diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 34d4846c1e83d..ac415d279f215 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,13 +30,10 @@ jobs:
         include:
           - name: "core / utils / tags"
             slug: "core-utils-tags"
-            modules: ":spark-core_2.13,:spark-launcher_2.13,:spark-network-common_2.13,:spark-network-shuffle_2.13,:spark-network-yarn_2.13,:spark-unsafe_2.13,:spark-kvstore_2.13,:spark-tags_2.13,:spark-sketch_2.13,:spark-common-utils_2.13"
-          - name: "graphx / examples / repl"
-            slug: "graphx-examples-repl"
-            modules: ":spark-graphx_2.13,:spark-examples_2.13,:spark-repl_2.13"
-          - name: "catalyst / sql-api / hive-thriftserver"
-            slug: "catalyst-sql-api-hive-thriftserver"
-            modules: ":spark-sql-api_2.13,:spark-catalyst_2.13,:spark-hive-thriftserver_2.13"
+            modules: ":spark-core_2.13,:spark-launcher_2.13,:spark-network-common_2.13,:spark-network-shuffle_2.13,:spark-network-yarn_2.13,:spark-unsafe_2.13,:spark-kvstore_2.13,:spark-tags_2.13,:spark-sketch_2.13,:spark-common-utils_2.13,:spark-common-utils-java_2.13,:spark-udf-worker-core_2.13"
+          - name: "catalyst / sql-api / hive-thriftserver / pipelines / graphx / examples / repl"
+            slug: "catalyst-graphx"
+            modules: ":spark-sql-api_2.13,:spark-catalyst_2.13,:spark-hive-thriftserver_2.13,:spark-pipelines_2.13,:spark-graphx_2.13,:spark-examples_2.13,:spark-repl_2.13"
           - name: "sql - extended tests"
             slug: "sql"
             modules: ":spark-sql_2.13"
@@ -52,10 +49,19 @@ jobs:
           - name: "hive"
             slug: "hive"
             modules: ":spark-hive_2.13"
-          - name: "streaming / mllib / yarn / k8s / connect / protobuf / kafka / avro"
-            slug: "streaming-mllib-yarn-k8s-connect-protobuf-kafka-avro"
-            modules: ":spark-streaming_2.13,:spark-sql-kafka-0-10_2.13,:spark-streaming-kafka-0-10_2.13,:spark-token-provider-kafka-0-10_2.13,:spark-mllib-local_2.13,:spark-mllib_2.13,:spark-yarn_2.13,:spark-kubernetes_2.13,:spark-hadoop-cloud_2.13,:spark-connect_2.13,:spark-connect-common_2.13,:spark-connect-client-jvm_2.13,:spark-protobuf_2.13,:spark-avro_2.13,:spark-assembly_2.13"
+          - name: "mllib"
+            slug: "mllib"
+            modules: ":spark-mllib-local_2.13,:spark-mllib_2.13"
+          - name: "connect / protobuf"
+            slug: "connect-protobuf"
+            modules: ":spark-connect_2.13,:spark-connect-common_2.13,:spark-connect-client-jvm_2.13,:spark-connect-client-jdbc_2.13,:spark-protobuf_2.13"
             extra: -Dtest.exclude.tags=org.apache.spark.tags.AmmoniteTest
+          - name: "streaming / kafka / avro"
+            slug: "streaming-kafka-avro"
+            modules: ":spark-streaming_2.13,:spark-sql-kafka-0-10_2.13,:spark-streaming-kafka-0-10_2.13,:spark-token-provider-kafka-0-10_2.13,:spark-avro_2.13"
+          - name: "yarn / k8s / hadoop-cloud / assembly"
+            slug: "yarn-k8s-hadoop-cloud-assembly"
+            modules: ":spark-yarn_2.13,:spark-kubernetes_2.13,:spark-hadoop-cloud_2.13,:spark-assembly_2.13"
     steps:
       - uses: actions/checkout@v6
 
@@ -77,8 +83,8 @@ jobs:
         run: |
           python3 -m pip install --upgrade pip
           python3 -m pip install 'numpy>=1.20.0' 'pyarrow' 'pandas' 'scipy' \
-            'unittest-xml-reporting' 'grpcio==1.56.0' 'protobuf==4.25.3' \
-            'grpcio-status==1.56.0' 'googleapis-common-protos==1.56.4' \
+            'unittest-xml-reporting' 'grpcio==1.76.0' 'protobuf==6.33.5' \
+            'grpcio-status==1.76.0' 'googleapis-common-protos==1.71.0' \
             'zstandard==0.25.0'
 
       - name: Build dependent modules (compile main+tests, install incl. test-jars)
@@ -149,23 +155,19 @@ jobs:
       matrix:
         include:
           - name: sql
-            modules: pyspark-sql,pyspark-resource,pyspark-testing
-          - name: core
-            modules: pyspark-core,pyspark-streaming
+            modules: pyspark-sql,pyspark-resource,pyspark-testing,pyspark-core,pyspark-errors,pyspark-logger
           - name: ml
-            modules: pyspark-mllib,pyspark-ml
+            modules: pyspark-mllib,pyspark-ml,pyspark-ml-connect,pyspark-pipelines
+          - name: streaming
+            modules: pyspark-streaming,pyspark-structured-streaming,pyspark-structured-streaming-connect
+          - name: connect
+            modules: pyspark-connect
           - name: pandas
             modules: pyspark-pandas
           - name: pandas-slow
             modules: pyspark-pandas-slow
-          - name: connect
-            modules: pyspark-connect
-          - name: pandas-connect
-            modules: pyspark-pandas-connect
-          - name: pandas-slow-connect
-            modules: pyspark-pandas-slow-connect
-          - name: errors
-            modules: pyspark-errors
+          - name: pandas-connect-and-slow
+            modules: pyspark-pandas-connect,pyspark-pandas-slow-connect
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       PYTHON_TO_TEST: python3.10
@@ -192,11 +194,12 @@ jobs:
             'numpy==1.26.4' 'pyarrow==18.0.0' 'pandas==2.2.0' 'scipy' \
             'unittest-xml-reporting' 'coverage' \
             'memory-profiler' 'plotly<6' 'matplotlib' \
-            'grpcio==1.56.0' 'grpcio-status==1.56.0' \
-            'protobuf==4.25.3' 'googleapis-common-protos==1.56.4' \
+            'grpcio==1.76.0' 'grpcio-status==1.76.0' \
+            'protobuf==6.33.5' 'googleapis-common-protos==1.71.0' \
             'graphviz>=0.20' 'openpyxl' \
             'scikit-learn==1.1.*' 'mlflow==3.12.0' \
-            'torch==2.0.1' 'torchvision==0.15.2' 'torcheval'
+            'torch==2.5.1' 'torchvision==0.20.1' 'torcheval' \
+            'zstandard==0.25.0'
 
       - name: Build Spark (full reactor including assembly)
         env:
diff --git a/AGENTS.md b/AGENTS.md
index 28272d19fe933..89c8a77d51a00 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -22,7 +22,7 @@ Avoid introducing non-ASCII characters in code or comments. String literals may
 
 ## Build and Test
 
-Build and tests can take a long time. Before running tests, ask the user if they have more changes to make.
+Build and tests can take a long time. If the user explicitly asked to run tests, run them. Otherwise (you are running tests on your own to verify a change), first ask the user if they have more changes to make.
 
 Prefer SBT over Maven for faster incremental compilation. Module names are defined in `project/SparkBuild.scala`.
 
@@ -128,3 +128,9 @@ DO NOT push to the upstream repo. Always push to the personal fork. Open PRs aga
 DO NOT force push or use `--amend` on pushed commits unless the user explicitly asks. If the remote branch has new commits, fetch and rebase before pushing.
 
 Always get user approval before external operations such as pushing commits, creating PRs, or posting comments. Use `gh pr create` to open PRs. If `gh` is not installed, generate the GitHub PR URL for the user and recommend installing the GitHub CLI.
+
+## Security
+
+Security model: [SECURITY.md](./SECURITY.md)
+
+Agents that scan this repository should consult `SECURITY.md` for the project's threat model, in-scope / out-of-scope declarations, and known non-findings before reporting issues.
diff --git a/R/.gitignore b/R/.gitignore
index c98504ab07781..693b2b7c69175 100644
--- a/R/.gitignore
+++ b/R/.gitignore
@@ -6,3 +6,7 @@ pkg/man
 pkg/html
 SparkR.Rcheck/
 SparkR_*.tar.gz
+# Transient copies made by dev/make-distribution.sh while building the package.
+pkg/LICENSE
+pkg/NOTICE
+DESCRIPTION.orig
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 7bbcee567c608..78283891dea81 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 4.2.0.1-4.3.0-0
+Version: 4.2.0.1-4.3.0-1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/README.md b/README.md
index 44a99a8231d31..e4476174203b8 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,6 @@ This README file only contains basic setup instructions.
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml)                           |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.14_nogil.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14_nogil.yml)               |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml)                     |
-|            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml)               |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_connect40.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_connect40.yml)                 |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_connect.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_connect.yml)                     |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_sparkr_window.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_sparkr_window.yml)                       |
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000000000..26b540d81ab53
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,13 @@
+# Security policy
+
+## Reporting a vulnerability
+
+To report a security vulnerability in Apache Spark, follow the ASF security process at <https://www.apache.org/security/>. Reports can be sent privately to security@apache.org or to private@spark.apache.org.
+
+## Threat model
+
+Apache Spark's security guidance for operators — covering authentication, encryption, access control, Kerberos integration, trust boundaries, environmental assumptions, security properties provided and not provided, and downstream operator responsibilities — is documented at:
+
+<https://spark.apache.org/docs/latest/security.html>
+
+Automated security scanning agents should consult that document for the project's in-scope / out-of-scope declarations before reporting issues.
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 440677b45e80f..aba28e2cf858d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index f1c5ea216d47e..6a936b31f28a0 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 63cb28d42d583..72dc7bef3b5f3 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
index 2edeb3f05c9b0..64edc0edae6f3 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
@@ -23,6 +23,7 @@
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assumptions.assumeFalse;
 
 public class JavaUtilsSuite {
 
@@ -52,6 +53,10 @@ public void testCreateDirectory() throws IOException {
     // 4. The parent directory cannot write
     assertTrue(testDir.canWrite());
     assertTrue(testDir.setWritable(false));
+    // Skip when setWritable(false) has no effect (e.g. running as root,
+    // or on a filesystem that ignores POSIX write bits).
+    assumeFalse(testDir.canWrite(),
+      "setWritable(false) had no effect; skipping write-denied scenario");
     assertThrows(IOException.class,
       () -> JavaUtils.createDirectory(testDirPath, "scenario4"));
     assertTrue(testDir.setWritable(true));
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 317499a1b116f..9b5a916587056 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index ca6631888edae..cf85e7577a759 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ff6d2c6b4859e..d5d9a986d664d 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 3a6ce00a2c557..b49d6baa14607 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index f23ca99dbdff6..55bb994fa9b15 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 3e2bfbcd87ca3..8df59b1f6e342 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -110,7 +110,7 @@ public record CollationMeta(
   public static class Collation {
     public final String collationName;
     public final String provider;
-    private final Collator collator;
+    private final ThreadLocal<Collator> threadLocalCollator;
     public final Comparator<UTF8String> comparator;
 
     /**
@@ -187,7 +187,7 @@ public static class Collation {
     public Collation(
         String collationName,
         String provider,
-        Collator collator,
+        ThreadLocal<Collator> threadLocalCollator,
         Comparator<UTF8String> comparator,
         String version,
         Function<UTF8String, byte[]> sortKeyFunction,
@@ -197,7 +197,7 @@ public Collation(
         boolean supportsSpaceTrimming) {
       this.collationName = collationName;
       this.provider = provider;
-      this.collator = collator;
+      this.threadLocalCollator = threadLocalCollator;
       this.comparator = comparator;
       this.version = version;
       this.sortKeyFunction = sortKeyFunction;
@@ -216,7 +216,7 @@ public Collation(
     }
 
     public Collator getCollator() {
-      return collator;
+      return threadLocalCollator != null ? threadLocalCollator.get() : null;
     }
 
     /**
@@ -1016,29 +1016,40 @@ protected Collation buildCollation() {
           builder.setUnicodeLocaleKeyword("ks", "level1");
         }
         ULocale resultLocale = builder.build();
-        Collator collator = Collator.getInstance(resultLocale);
-        // Freeze ICU collator to ensure thread safety.
-        collator.freeze();
+
+        // Use thread-local Collator instances to avoid lock contention.
+        // A frozen RuleBasedCollator serializes all threads through a ReentrantLock on its
+        // internal collation buffer (used by getCollationKey/compare). By creating independent
+        // per-thread instances via Collator.getInstance(), each thread operates on its own
+        // buffer without locking. Each instance is frozen as a mutation guard so that any
+        // accidental call to setStrength() or similar throws immediately.
+        ThreadLocal<Collator> threadLocalCollator = ThreadLocal.withInitial(
+          () -> {
+            Collator collator = Collator.getInstance(resultLocale);
+            collator.freeze();
+            return collator;
+          });
 
         Comparator<UTF8String> comparator;
         Function<UTF8String, byte[]> sortKeyFunction;
 
         if (spaceTrimming == SpaceTrimming.NONE) {
           comparator = (s1, s2) ->
-            collator.compare(s1.toValidString(), s2.toValidString());
-          sortKeyFunction = s -> collator.getCollationKey(s.toValidString()).toByteArray();
+            threadLocalCollator.get().compare(s1.toValidString(), s2.toValidString());
+          sortKeyFunction = s ->
+            threadLocalCollator.get().getCollationKey(s.toValidString()).toByteArray();
         } else {
-          comparator = (s1, s2) -> collator.compare(
+          comparator = (s1, s2) -> threadLocalCollator.get().compare(
             applyTrimmingPolicy(s1, spaceTrimming).toValidString(),
             applyTrimmingPolicy(s2, spaceTrimming).toValidString());
-          sortKeyFunction = s -> collator.getCollationKey(
+          sortKeyFunction = s -> threadLocalCollator.get().getCollationKey(
             applyTrimmingPolicy(s, spaceTrimming).toValidString()).toByteArray();
         }
 
         return new Collation(
           normalizedCollationName(),
           PROVIDER_ICU,
-          collator,
+          threadLocalCollator,
           comparator,
           ICU_VERSION,
           sortKeyFunction,
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/BinaryView.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/BinaryView.java
new file mode 100644
index 0000000000000..998fff77f29e1
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/BinaryView.java
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.KryoSerializable;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.nio.ByteBuffer;
+
+import org.apache.spark.annotation.Unstable;
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.hash.Murmur3_x86_32;
+
+import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
+
+/**
+ * A non-owning view over a contiguous chunk of bytes that may live on-heap or off-heap.
+ * It is intended as the shared physical carrier for opaque-bytes SQL types whose values
+ * can be read directly out of an {@code UnsafeRow} / {@code UnsafeArrayData} /
+ * {@code ColumnVector} backing buffer.
+ * <p>
+ * Lifetime: a {@code BinaryView} is only valid for as long as the memory it points to is
+ * alive. Callers that need to retain a value across the source buffer's lifetime must call
+ * {@link #copy()} first.
+ */
+@Unstable
+public final class BinaryView implements Comparable<BinaryView>, Externalizable, KryoSerializable {
+
+  // null when off-heap; a byte[] (or other primitive array) when on-heap.
+  private Object base;
+  // For on-heap, this is BYTE_ARRAY_OFFSET + index into the array. For off-heap, this is
+  // the absolute native address.
+  private long offset;
+  private int numBytes;
+
+  /** For Externalizable / KryoSerializable only. */
+  public BinaryView() {}
+
+  private BinaryView(Object base, long offset, int numBytes) {
+    this.base = base;
+    this.offset = offset;
+    this.numBytes = numBytes;
+  }
+
+  // ---------- factories ----------
+
+  /**
+   * Creates a view over the given byte array. The array is referenced, not copied; callers
+   * must not mutate it while the returned view is in use.
+   */
+  public static BinaryView fromBytes(byte[] bytes) {
+    if (bytes == null) return null;
+    return new BinaryView(bytes, BYTE_ARRAY_OFFSET, bytes.length);
+  }
+
+  /** Creates a view over a sub-range of the given byte array (no copy). */
+  public static BinaryView fromBytes(byte[] bytes, int offset, int numBytes) {
+    if (bytes == null) return null;
+    return new BinaryView(bytes, BYTE_ARRAY_OFFSET + offset, numBytes);
+  }
+
+  /**
+   * Creates a view at the given Tungsten-style address. {@code base == null} means off-heap
+   * and {@code offset} is the absolute native address; otherwise {@code base} is a JVM
+   * primitive array and {@code offset} is {@code BYTE_ARRAY_OFFSET + index}.
+   */
+  public static BinaryView fromAddress(Object base, long offset, int numBytes) {
+    return new BinaryView(base, offset, numBytes);
+  }
+
+  // ---------- accessors ----------
+
+  /** The backing object: a primitive array when on-heap, or {@code null} when off-heap. */
+  public Object getBaseObject() { return base; }
+
+  /** Tungsten-style offset: see the class javadoc. */
+  public long getBaseOffset() { return offset; }
+
+  public int numBytes() { return numBytes; }
+
+  public boolean isOffHeap() { return base == null; }
+
+  // ---------- random-access primitive reads ----------
+  // Coordinates are relative to the start of this view, i.e. i in [0, numBytes).
+
+  public byte getByte(int i) {
+    assert i >= 0 && i < numBytes : invalidRangeMessage(i, 1);
+    return Platform.getByte(base, offset + i);
+  }
+
+  public short getShort(int i) {
+    assert i >= 0 && i + 2 <= numBytes : invalidRangeMessage(i, 2);
+    return Platform.getShort(base, offset + i);
+  }
+
+  public int getInt(int i) {
+    assert i >= 0 && i + 4 <= numBytes : invalidRangeMessage(i, 4);
+    return Platform.getInt(base, offset + i);
+  }
+
+  public long getLong(int i) {
+    assert i >= 0 && i + 8 <= numBytes : invalidRangeMessage(i, 8);
+    return Platform.getLong(base, offset + i);
+  }
+
+  public float getFloat(int i) {
+    assert i >= 0 && i + 4 <= numBytes : invalidRangeMessage(i, 4);
+    return Platform.getFloat(base, offset + i);
+  }
+
+  public double getDouble(int i) {
+    assert i >= 0 && i + 8 <= numBytes : invalidRangeMessage(i, 8);
+    return Platform.getDouble(base, offset + i);
+  }
+
+  private String invalidRangeMessage(int i, int width) {
+    return "Invalid access at offset " + i + " (width " + width + ") in BinaryView of "
+      + numBytes + " bytes";
+  }
+
+  // ---------- materialization and slicing ----------
+
+  /**
+   * Returns true iff this view owns a tight, on-heap {@code byte[]}: the offset is exactly
+   * {@code BYTE_ARRAY_OFFSET} and the array length equals {@link #numBytes()}. In that case
+   * {@link #getBytes()} returns the live backing array, so mutating it writes through to this
+   * view; otherwise {@code getBytes()} returns a fresh copy. Sliced, sub-range, and off-heap
+   * views are never tight on-heap arrays.
+   */
+  public boolean hasTightOnHeapArray() {
+    return offset == BYTE_ARRAY_OFFSET
+        && base instanceof byte[] bytes
+        && bytes.length == numBytes;
+  }
+
+  /**
+   * Returns the bytes of this view as a {@code byte[]}.
+   * <p>
+   * Mirrors {@link UTF8String#getBytes()}: if this view already owns a tight, on-heap byte
+   * array (see {@link #hasTightOnHeapArray()}), the backing array is returned directly
+   * without copying. Otherwise a fresh array is allocated and the bytes are copied into it.
+   * <p>
+   * The caller must not mutate the returned array, since when this view owns a tight array
+   * it is shared with the view itself. Use {@link #copy()} to obtain an independent owned
+   * value.
+   */
+  public byte[] getBytes() {
+    if (hasTightOnHeapArray()) {
+      return (byte[]) base;
+    }
+    byte[] out = new byte[numBytes];
+    Platform.copyMemory(base, offset, out, BYTE_ARRAY_OFFSET, numBytes);
+    return out;
+  }
+
+  /**
+   * Returns an independent {@code BinaryView} that owns a fresh on-heap byte array
+   * containing this view's data. Use this before storing the value past the source
+   * buffer's lifetime.
+   */
+  public BinaryView copy() {
+    return new BinaryView(copyToNewArray(), BYTE_ARRAY_OFFSET, numBytes);
+  }
+
+  private byte[] copyToNewArray() {
+    byte[] out = new byte[numBytes];
+    Platform.copyMemory(base, offset, out, BYTE_ARRAY_OFFSET, numBytes);
+    return out;
+  }
+
+  /** Returns a sub-view (no copy). */
+  public BinaryView slice(int start, int len) {
+    assert start >= 0 && len >= 0 && start + len <= numBytes
+      : "Invalid slice start=" + start + " len=" + len + " of view with " + numBytes + " bytes";
+    return new BinaryView(base, offset + start, len);
+  }
+
+  /**
+   * Copies this view's bytes to the given target memory address. Used by writers that
+   * already know where the bytes should land (e.g. {@code UnsafeWriter}).
+   */
+  public void writeToMemory(Object target, long targetOffset) {
+    Platform.copyMemory(base, offset, target, targetOffset, numBytes);
+  }
+
+  /**
+   * Wraps this view as a {@link ByteBuffer}. The heap path returns a {@code ByteBuffer.wrap}
+   * around the existing array (zero-copy); the off-heap path materializes a fresh array
+   * because there is no portable way to expose an off-heap address through the public
+   * {@code ByteBuffer} API.
+   */
+  public ByteBuffer toByteBuffer() {
+    if (base instanceof byte[] bytes && offset >= BYTE_ARRAY_OFFSET) {
+      long arrayOffset = offset - BYTE_ARRAY_OFFSET;
+      if ((long) bytes.length < arrayOffset + numBytes) {
+        throw new ArrayIndexOutOfBoundsException();
+      }
+      return ByteBuffer.wrap(bytes, (int) arrayOffset, numBytes);
+    }
+    return ByteBuffer.wrap(copyToNewArray());
+  }
+
+  // ---------- equality / hashing / ordering ----------
+
+  @Override
+  public int hashCode() {
+    return Murmur3_x86_32.hashUnsafeBytes(base, offset, numBytes, 42);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) return true;
+    if (other instanceof BinaryView o) {
+      return numBytes == o.numBytes
+        && ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes);
+    }
+    return false;
+  }
+
+  /** Lexicographic, unsigned byte-wise comparison. */
+  @Override
+  public int compareTo(BinaryView other) {
+    return ByteArray.compareBinary(
+      base, offset, numBytes, other.base, other.offset, other.numBytes);
+  }
+
+  // ---------- serialization ----------
+  // Both paths always materialize an on-heap byte[] on read so that the deserialized view
+  // owns its data; senders may pass a view into another buffer.
+
+  @Override
+  public void writeExternal(ObjectOutput out) throws IOException {
+    out.writeInt(numBytes);
+    if (numBytes > 0) {
+      if (hasTightOnHeapArray()) {
+        out.write((byte[]) base);
+      } else {
+        out.write(copyToNewArray());
+      }
+    }
+  }
+
+  @Override
+  public void readExternal(ObjectInput in) throws IOException {
+    int n = in.readInt();
+    byte[] bytes = new byte[n];
+    in.readFully(bytes);
+    this.base = bytes;
+    this.offset = BYTE_ARRAY_OFFSET;
+    this.numBytes = n;
+  }
+
+  @Override
+  public void write(Kryo kryo, Output out) {
+    out.writeInt(numBytes);
+    if (numBytes > 0) {
+      if (hasTightOnHeapArray()) {
+        out.write((byte[]) base);
+      } else {
+        out.write(copyToNewArray());
+      }
+    }
+  }
+
+  @Override
+  public void read(Kryo kryo, Input in) {
+    int n = in.readInt();
+    byte[] bytes = new byte[n];
+    in.read(bytes);
+    this.base = bytes;
+    this.offset = BYTE_ARRAY_OFFSET;
+    this.numBytes = n;
+  }
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java
deleted file mode 100644
index 48b121ba894a5..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types;
-
-import org.apache.spark.annotation.Unstable;
-
-import java.io.Serializable;
-
-// This class represents the physical type for the GEOGRAPHY data type.
-@Unstable
-public final class GeographyVal implements Comparable<GeographyVal>, Serializable {
-
-  // The GEOGRAPHY type is implemented as a byte array. We provide `getBytes` and `fromBytes`
-  // methods for readers and writers to access this underlying array of bytes.
-  private final byte[] value;
-
-  // We make the constructor private. We should use `fromBytes` to create new instances.
-  private GeographyVal(byte[] value) {
-    this.value = value;
-  }
-
-  public byte[] getBytes() {
-    return value;
-  }
-
-  public static GeographyVal fromBytes(byte[] bytes) {
-    if (bytes == null) {
-      return null;
-    } else {
-      return new GeographyVal(bytes);
-    }
-  }
-
-  // Comparison is not yet supported for GEOGRAPHY.
-  public int compareTo(GeographyVal g) {
-    throw new UnsupportedOperationException();
-  }
-}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java
deleted file mode 100644
index 381d3e25c68af..0000000000000
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types;
-
-import org.apache.spark.annotation.Unstable;
-
-import java.io.Serializable;
-
-// This class represents the physical type for the GEOMETRY data type.
-@Unstable
-public final class GeometryVal implements Comparable<GeometryVal>, Serializable {
-
-  // The GEOMETRY type is implemented as a byte array. We provide `getBytes` and `fromBytes`
-  // methods for readers and writers to access this underlying array of bytes.
-  private final byte[] value;
-
-  // We make the constructor private. We should use `fromBytes` to create new instances.
-  private GeometryVal(byte[] value) {
-    this.value = value;
-  }
-
-  public byte[] getBytes() {
-    return value;
-  }
-
-  public static GeometryVal fromBytes(byte[] bytes) {
-    if (bytes == null) {
-      return null;
-    } else {
-      return new GeometryVal(bytes);
-    }
-  }
-
-  // Comparison is not yet supported for GEOMETRY.
-  public int compareTo(GeometryVal g) {
-    throw new UnsupportedOperationException();
-  }
-}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/BinaryViewSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/BinaryViewSuite.java
new file mode 100644
index 0000000000000..1e1216ffec76d
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/BinaryViewSuite.java
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.nio.ByteBuffer;
+
+import org.junit.jupiter.api.Test;
+
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.memory.MemoryAllocator;
+import org.apache.spark.unsafe.memory.MemoryBlock;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class BinaryViewSuite {
+
+  private static final byte[] DATA = new byte[] { 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 };
+
+  @Test
+  public void nullFactoriesReturnNull() {
+    assertNull(BinaryView.fromBytes(null));
+    assertNull(BinaryView.fromBytes(null, 0, 0));
+  }
+
+  @Test
+  public void onHeapFromBytesReferencesBackingArray() {
+    BinaryView v = BinaryView.fromBytes(DATA);
+    assertEquals(DATA.length, v.numBytes());
+    assertFalse(v.isOffHeap());
+    assertSame(DATA, v.getBaseObject());
+    // getBytes() must return the backing array when the view owns a tight, on-heap byte[].
+    assertSame(DATA, v.getBytes());
+  }
+
+  @Test
+  public void sliceOfHeapBytes() {
+    BinaryView full = BinaryView.fromBytes(DATA);
+    BinaryView mid = full.slice(2, 4);
+    assertEquals(4, mid.numBytes());
+    // Slice shares the backing array but is not a tight owner, so getBytes() must copy.
+    assertArrayEquals(new byte[] { 30, 40, 50, 60 }, mid.getBytes());
+    assertNotSame(DATA, mid.getBytes());
+    // Range reads use slice-relative coordinates.
+    assertEquals(30, mid.getByte(0));
+    assertEquals(60, mid.getByte(3));
+  }
+
+  @Test
+  public void hasTightOnHeapArray() {
+    // A view that owns the whole array is a tight on-heap owner.
+    assertTrue(BinaryView.fromBytes(DATA).hasTightOnHeapArray());
+    // A sub-range view is not, even when on-heap.
+    assertFalse(BinaryView.fromBytes(DATA, 2, 4).hasTightOnHeapArray());
+    assertFalse(BinaryView.fromBytes(DATA).slice(0, DATA.length - 1).hasTightOnHeapArray());
+    // copy() always produces a tight on-heap owner.
+    assertTrue(BinaryView.fromBytes(DATA, 2, 4).copy().hasTightOnHeapArray());
+
+    MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length);
+    try {
+      Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET,
+        null, block.getBaseOffset(), DATA.length);
+      BinaryView offHeap = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length);
+      // An off-heap view is never a tight on-heap owner.
+      assertFalse(offHeap.hasTightOnHeapArray());
+      assertTrue(offHeap.copy().hasTightOnHeapArray());
+    } finally {
+      MemoryAllocator.UNSAFE.free(block);
+    }
+  }
+
+  @Test
+  public void primitiveReaders() {
+    byte[] bytes = new byte[16];
+    Platform.putInt(bytes, Platform.BYTE_ARRAY_OFFSET, 0xCAFEBABE);
+    Platform.putLong(bytes, Platform.BYTE_ARRAY_OFFSET + 8, 0x1234567890ABCDEFL);
+    BinaryView v = BinaryView.fromBytes(bytes);
+    assertEquals(0xCAFEBABE, v.getInt(0));
+    assertEquals(0x1234567890ABCDEFL, v.getLong(8));
+  }
+
+  @Test
+  public void copyIsIndependent() {
+    byte[] bytes = DATA.clone();
+    BinaryView v = BinaryView.fromBytes(bytes);
+    BinaryView c = v.copy();
+    assertNotSame(v.getBaseObject(), c.getBaseObject());
+    assertArrayEquals(DATA, c.getBytes());
+    // Mutating the source must not affect the copy.
+    bytes[0] = 99;
+    assertEquals(10, c.getBytes()[0]);
+  }
+
+  @Test
+  public void offHeapView() {
+    MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length);
+    try {
+      Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET,
+        null, block.getBaseOffset(), DATA.length);
+      BinaryView v = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length);
+      assertTrue(v.isOffHeap());
+      assertNull(v.getBaseObject());
+      assertEquals(DATA.length, v.numBytes());
+      // getBytes() on an off-heap view must materialize a new array.
+      byte[] materialized = v.getBytes();
+      assertArrayEquals(DATA, materialized);
+      // copy() materializes to an on-heap, tight owner.
+      BinaryView c = v.copy();
+      assertFalse(c.isOffHeap());
+      assertArrayEquals(DATA, c.getBytes());
+    } finally {
+      MemoryAllocator.UNSAFE.free(block);
+    }
+  }
+
+  @Test
+  public void equalsAcrossHeapAndOffHeap() {
+    MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length);
+    try {
+      Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET,
+        null, block.getBaseOffset(), DATA.length);
+      BinaryView heap = BinaryView.fromBytes(DATA);
+      BinaryView offHeap = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length);
+      assertEquals(heap, offHeap);
+      assertEquals(heap.hashCode(), offHeap.hashCode());
+    } finally {
+      MemoryAllocator.UNSAFE.free(block);
+    }
+  }
+
+  @Test
+  public void compareToIsUnsignedLexicographic() {
+    BinaryView a = BinaryView.fromBytes(new byte[] { 1, 2, 3 });
+    BinaryView b = BinaryView.fromBytes(new byte[] { 1, 2, 4 });
+    BinaryView c = BinaryView.fromBytes(new byte[] { 1, 2, 3, 0 });
+    BinaryView neg = BinaryView.fromBytes(new byte[] { (byte) 0x80 });
+    BinaryView pos = BinaryView.fromBytes(new byte[] { 0x7F });
+    assertTrue(a.compareTo(b) < 0);
+    assertTrue(b.compareTo(a) > 0);
+    assertTrue(a.compareTo(c) < 0); // shorter prefix is less
+    assertEquals(0, a.compareTo(BinaryView.fromBytes(new byte[] { 1, 2, 3 })));
+    // Unsigned byte comparison: 0x80 > 0x7F.
+    assertTrue(neg.compareTo(pos) > 0);
+  }
+
+  @Test
+  public void byteBufferRoundTripHeap() {
+    BinaryView v = BinaryView.fromBytes(DATA);
+    ByteBuffer bb = v.toByteBuffer();
+    assertTrue(bb.hasArray());
+    assertEquals(DATA.length, bb.remaining());
+    byte[] out = new byte[DATA.length];
+    bb.get(out);
+    assertArrayEquals(DATA, out);
+  }
+
+  @Test
+  public void byteBufferOffHeapMaterializes() {
+    MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length);
+    try {
+      Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET,
+        null, block.getBaseOffset(), DATA.length);
+      BinaryView v = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length);
+      ByteBuffer bb = v.toByteBuffer();
+      // For off-heap, toByteBuffer materializes into a fresh on-heap array.
+      assertTrue(bb.hasArray());
+      byte[] out = new byte[DATA.length];
+      bb.get(out);
+      assertArrayEquals(DATA, out);
+    } finally {
+      MemoryAllocator.UNSAFE.free(block);
+    }
+  }
+
+  @Test
+  public void writeToMemoryRoundTrip() {
+    BinaryView v = BinaryView.fromBytes(DATA);
+    byte[] target = new byte[DATA.length + 4];
+    v.writeToMemory(target, Platform.BYTE_ARRAY_OFFSET + 2);
+    for (int i = 0; i < DATA.length; i++) {
+      assertEquals(DATA[i], target[i + 2]);
+    }
+  }
+
+  @Test
+  public void javaSerializationRoundTrip() throws Exception {
+    // Serialize a view that points at a sub-range of a larger array; deserialized value
+    // must own a tight backing array containing only the visible bytes.
+    BinaryView v = BinaryView.fromBytes(DATA, 2, 4);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try (ObjectOutputStream out = new ObjectOutputStream(baos)) {
+      out.writeObject(v);
+    }
+    BinaryView read;
+    try (ObjectInputStream in =
+        new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()))) {
+      read = (BinaryView) in.readObject();
+    }
+    assertEquals(4, read.numBytes());
+    assertArrayEquals(new byte[] { 30, 40, 50, 60 }, read.getBytes());
+    assertFalse(read.isOffHeap());
+    assertEquals(v, read);
+  }
+
+  @Test
+  public void kryoSerializationRoundTrip() {
+    Kryo kryo = new Kryo();
+    kryo.register(BinaryView.class);
+    BinaryView v = BinaryView.fromBytes(DATA, 3, 5);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try (Output out = new Output(baos)) {
+      kryo.writeObject(out, v);
+    }
+    BinaryView read;
+    try (Input in = new Input(new ByteArrayInputStream(baos.toByteArray()))) {
+      read = kryo.readObject(in, BinaryView.class);
+    }
+    assertEquals(5, read.numBytes());
+    assertArrayEquals(new byte[] { 40, 50, 60, 70, 80 }, read.getBytes());
+    assertEquals(v, read);
+  }
+
+}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java
deleted file mode 100644
index 639a8b2f77821..0000000000000
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types;
-
-import org.junit.jupiter.api.Test;
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
-
-public class GeographyValSuite {
-
-  @Test
-  public void roundTripBytes() {
-    // A simple byte array to test the round trip (`fromBytes` -> `getBytes`).
-    byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 };
-    GeographyVal geographyVal = GeographyVal.fromBytes(bytes);
-    assertNotNull(geographyVal);
-    assertArrayEquals(bytes, geographyVal.getBytes());
-  }
-
-  @Test
-  public void roundNullHandling() {
-    // A simple null byte array to test null handling for GEOGRAPHY.
-    byte[] bytes = null;
-    GeographyVal geographyVal = GeographyVal.fromBytes(bytes);
-    assertNull(geographyVal);
-  }
-
-  @Test
-  public void testCompareTo() {
-    // Comparison is not yet supported for GEOGRAPHY.
-    byte[] bytes1 = new byte[] { 1, 2, 3 };
-    byte[] bytes2 = new byte[] { 4, 5, 6 };
-    GeographyVal geographyVal1 = GeographyVal.fromBytes(bytes1);
-    GeographyVal geographyVal2 = GeographyVal.fromBytes(bytes2);
-    try {
-      geographyVal1.compareTo(geographyVal2);
-    } catch (UnsupportedOperationException e) {
-      assert(e.toString().equals("java.lang.UnsupportedOperationException"));
-    }
-  }
-}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java
deleted file mode 100644
index e38c6903e6ddc..0000000000000
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types;
-
-import org.junit.jupiter.api.Test;
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
-
-public class GeometryValSuite {
-
-  @Test
-  public void roundTripBytes() {
-    // A simple byte array to test the round trip (`fromBytes` -> `getBytes`).
-    byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 };
-    GeometryVal geometryVal = GeometryVal.fromBytes(bytes);
-    assertNotNull(geometryVal);
-    assertArrayEquals(bytes, geometryVal.getBytes());
-  }
-
-  @Test
-  public void roundNullHandling() {
-    // A simple null byte array to test null handling for GEOMETRY.
-    byte[] bytes = null;
-    GeometryVal geometryVal = GeometryVal.fromBytes(bytes);
-    assertNull(geometryVal);
-  }
-
-  @Test
-  public void testCompareTo() {
-    // Comparison is not yet supported for GEOMETRY.
-    byte[] bytes1 = new byte[] { 1, 2, 3 };
-    byte[] bytes2 = new byte[] { 4, 5, 6 };
-    GeometryVal geometryVal1 = GeometryVal.fromBytes(bytes1);
-    GeometryVal geometryVal2 = GeometryVal.fromBytes(bytes2);
-    try {
-      geometryVal1.compareTo(geometryVal2);
-    } catch (UnsupportedOperationException e) {
-      assert(e.toString().equals("java.lang.UnsupportedOperationException"));
-    }
-  }
-}
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
index ddf588b6c64c7..87f1d0a1c75f3 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
@@ -300,6 +300,37 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
     })
   }
 
+  test("test concurrent comparator, sortKeyFunction, and getCollator on ICU collations") {
+    // Thread-local collator instances avoid lock contention on ICU's internal collation buffer.
+    // This test verifies correctness under concurrent access for all three paths:
+    // comparator, sortKeyFunction, and getCollator().
+    val collationNames = Seq("UNICODE", "en", "de", "en_CI", "en_AI")
+    collationNames.foreach { name =>
+      val collation = fetchCollation(name)
+      val s1 = toUTF8("apple")
+      val s2 = toUTF8("banana")
+      val expectedCmp = collation.comparator.compare(s1, s2)
+      val expectedKey = collation.sortKeyFunction.apply(s1).asInstanceOf[Array[Byte]]
+      val expectedCollatorKey =
+        collation.getCollator.getCollationKey(s1.toValidString()).toByteArray
+
+      (0 to 5).foreach(_ => {
+        IntStream.rangeClosed(0, 200).parallel().forEach { _ =>
+          val cmp = collation.comparator.compare(s1, s2)
+          assert(cmp == expectedCmp,
+            s"Comparator returned inconsistent result for $name")
+          val key = collation.sortKeyFunction.apply(s1).asInstanceOf[Array[Byte]]
+          assert(java.util.Arrays.equals(key, expectedKey),
+            s"sortKeyFunction returned inconsistent result for $name")
+          val collatorKey =
+            collation.getCollator.getCollationKey(s1.toValidString()).toByteArray
+          assert(java.util.Arrays.equals(collatorKey, expectedCollatorKey),
+            s"getCollator().getCollationKey() returned inconsistent result for $name")
+        }
+      })
+    }
+  }
+
   test("test collation caching") {
     Seq(
       "UTF8_BINARY",
diff --git a/common/utils-java/pom.xml b/common/utils-java/pom.xml
index cd06b89da9939..433bffd7e405e 100644
--- a/common/utils-java/pom.xml
+++ b/common/utils-java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java b/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java
index d8238912aec63..e92ef6f462a3f 100644
--- a/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java
+++ b/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java
@@ -331,6 +331,10 @@ public enum LogKeys implements LogKey {
   LABEL_COLUMN,
   LARGEST_CLUSTER_INDEX,
   LAST_ACCESS_TIME,
+  LAST_ATTEMPT_ACC_INVALIDATE,
+  LAST_ATTEMPT_ACC_SYSTEM_METRIC,
+  LAST_ATTEMPT_ACC_UNEXPECTED_REASON,
+  LAST_ATTEMPT_ACC_USER_METRIC,
   LAST_COMMITTED_CHECKPOINT_ID,
   LAST_COMMIT_BASED_CHECKPOINT_ID,
   LAST_SCAN_TIME,
@@ -452,6 +456,7 @@ public enum LogKeys implements LogKey {
   NUM_BYTES_USED,
   NUM_CATEGORIES,
   NUM_CHECKSUM_FILE,
+  NUM_CHILDREN,
   NUM_CHUNKS,
   NUM_CLASSES,
   NUM_COEFFICIENTS,
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index be2447b3ec4e4..296c30a6d25f7 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
index 7c5017b97f0db..926019df1e74f 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -191,6 +191,129 @@
     ],
     "sqlState" : "0A000"
   },
+  "AUTOCDC_BOTH_COLUMN_LIST_AND_EXCEPT_COLUMN_LIST" : {
+    "message" : [
+      "AutoCDC flow specifies both `column_list` and `except_column_list`; at most one may be provided."
+    ],
+    "sqlState" : "42613"
+  },
+  "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA" : {
+    "message" : [
+      "Using <caseSensitivity> column name comparison, the following columns are not present in the <schemaName> schema: <missingColumns>. Available columns: <availableColumns>."
+    ],
+    "sqlState" : "42703"
+  },
+  "AUTOCDC_EMPTY_KEYS" : {
+    "message" : [
+      "AutoCDC requires at least one key column to identify rows, but received an empty key set."
+    ],
+    "sqlState" : "22023"
+  },
+  "AUTOCDC_INVALID_STATE" : {
+    "message" : [
+      "AutoCDC flow <flowName> detected an invalid state:"
+    ],
+    "subClass" : {
+      "AUXILIARY_TABLE_KEY_COLUMN_MISSING" : {
+        "message" : [
+          "The auxiliary table <auxTableName> is missing key column <keyColumnName> that is recorded in its <propertyName> table property. The auxiliary table schema may be corrupted or have been modified externally. Perform a full refresh of the target table to recreate the auxiliary table."
+        ]
+      },
+      "AUXILIARY_TABLE_PROPERTY_MALFORMED" : {
+        "message" : [
+          "The auxiliary table <auxTableName> has a malformed <propertyName> property with raw value '<rawValue>'. The property must be a JSON array of strings (e.g. '[\"id\",\"region\"]'). The auxiliary table metadata may be corrupted or have been modified externally. Perform a full refresh of the target table to recreate the auxiliary table."
+        ]
+      },
+      "AUXILIARY_TABLE_PROPERTY_MISSING" : {
+        "message" : [
+          "The auxiliary table <auxTableName> is missing the required <propertyName> table property; cannot validate AutoCDC key columns. The auxiliary table metadata may be corrupted or have been modified externally. Perform a full refresh of the target table to recreate the auxiliary table."
+        ]
+      },
+      "KEY_SCHEMA_DRIFT" : {
+        "message" : [
+          "The AutoCDC flow's current key columns <expectedKeySchema> do not match the keys recorded in the auxiliary table <auxTableName> (recorded keys <recordedKeySchema>). AutoCDC does not support changing key columns or their types across incremental pipeline runs. To change keys, perform a full refresh of the target table."
+        ]
+      }
+    },
+    "sqlState" : "42000"
+  },
+  "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA" : {
+    "message" : [
+      "Using <caseSensitivity> column name comparison, the AutoCDC key column `<keyColumnName>` is not present in the flow's selected source schema. AutoCDC requires every key column to be present in the source change-data feed and retained by any configured column selection."
+    ],
+    "sqlState" : "22023"
+  },
+  "AUTOCDC_MICROBATCH_VALIDATION" : {
+    "message" : [
+      "AutoCDC flow on table <tableName> in batch <batchId> failed microbatch validation."
+    ],
+    "subClass" : {
+      "NON_ORDERABLE_SEQUENCE" : {
+        "message" : [
+          "The sequencing column has non-orderable type <dataType>. The sequencing column must be of a type that supports ordering."
+        ]
+      },
+      "NULL_KEY" : {
+        "message" : [
+          "The microbatch contains rows with null values in the following key column(s): <nullKeyCounts>. All rows must have non-null values for every key column."
+        ]
+      },
+      "NULL_SEQUENCE" : {
+        "message" : [
+          "The microbatch contains <nullCount> row(s) with a null sequencing value. All rows must have a non-null sequencing value."
+        ]
+      }
+    },
+    "sqlState" : "22000"
+  },
+  "AUTOCDC_MISSING_SEQUENCE_BY" : {
+    "message" : [
+      "AutoCDC flow is missing a required `sequence_by` expression. Specify a `sequence_by` column or expression that orders incoming change events."
+    ],
+    "sqlState" : "22023"
+  },
+  "AUTOCDC_MISSING_SOURCE" : {
+    "message" : [
+      "AutoCDC flow is missing a required `source` table name. Specify the name of the streaming source table the flow should read from."
+    ],
+    "sqlState" : "22023"
+  },
+  "AUTOCDC_MULTIPART_COLUMN_IDENTIFIER" : {
+    "message" : [
+      "Expected a single column identifier; got the multi-part identifier <columnName> (parts: <nameParts>)."
+    ],
+    "sqlState" : "22023"
+  },
+  "AUTOCDC_MULTIPLE_FLOWS_TO_TARGET" : {
+    "message" : [
+      "Invalid AutoCDC destination <tableName> with multiple flows: <flows>. An AutoCDC target table must have exactly one flow writing to it."
+    ],
+    "sqlState" : "42000"
+  },
+  "AUTOCDC_NON_COLUMN_IDENTIFIER" : {
+    "message" : [
+      "Expected a column identifier; got the non-attribute expression `<expression>`. AutoCDC keys, sequence_by, column_list, and except_column_list must reference unqualified column names."
+    ],
+    "sqlState" : "22023"
+  },
+  "AUTOCDC_RESERVED_COLUMN_NAME_PREFIX_CONFLICT" : {
+    "message" : [
+      "The column `<columnName>` in the <schemaName> schema collides with the reserved AutoCDC column name prefix `<reservedColumnNamePrefix>` (using <caseSensitivity> column name comparison). Rename or remove the column."
+    ],
+    "sqlState" : "42710"
+  },
+  "AUTOCDC_SCD2_NOT_SUPPORTED" : {
+    "message" : [
+      "AutoCDC flows do not currently support SCD Type 2 transformations."
+    ],
+    "sqlState" : "0A000"
+  },
+  "AUTOCDC_TARGET_DOES_NOT_SUPPORT_MERGE" : {
+    "message" : [
+      "Cannot start AutoCDC flow: the target table <tableName> (format: <format>) does not support row-level operations. AutoCDC requires a target backed by a connector that supports MERGE."
+    ],
+    "sqlState" : "0A000"
+  },
   "AVRO_CANNOT_WRITE_NULL_FIELD" : {
     "message" : [
       "Cannot write null value for field <name> defined as non-null Avro data type <dataType>.",
@@ -661,6 +784,29 @@
     ],
     "sqlState" : "42P08"
   },
+  "CHANGELOG_CONTRACT_VIOLATION" : {
+    "message" : [
+      "The Change Data Capture (CDC) connector violated the `Changelog` contract at runtime."
+    ],
+    "subClass" : {
+      "NULL_COMMIT_TIMESTAMP" : {
+        "message" : [
+          "Connector emitted a row with a NULL `_commit_timestamp` on a streaming read engaging post-processing. The `Changelog` contract requires `_commit_timestamp` to be non-NULL for streaming reads, since post-processing uses it as event time to advance the watermark."
+        ]
+      },
+      "UNEXPECTED_CHANGE_TYPE" : {
+        "message" : [
+          "Connector emitted a row with a `_change_type` value that is not one of the four supported types (`insert`, `delete`, `update_preimage`, `update_postimage`). The `Changelog` contract requires every emitted row to carry one of these four values."
+        ]
+      },
+      "UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION" : {
+        "message" : [
+          "Connector emitted multiple delete or insert rows for the same `(rowId, _commit_version)` partition. The `Changelog` contract requires at most one logical change per row identity per commit when `containsIntermediateChanges() = false`. Either fix the connector to deduplicate intermediate states, or set `containsIntermediateChanges() = true` and use `deduplicationMode = netChanges`."
+        ]
+      }
+    },
+    "sqlState" : "XX000"
+  },
   "CHECKPOINT_FILE_CHECKSUM_VERIFICATION_FAILED" : {
     "message" : [
       "Checksum verification failed, the file may be corrupted. File: <fileName>",
@@ -2785,6 +2931,12 @@
           "<errors>"
         ]
       },
+      "COLUMN_ID_MISMATCH" : {
+        "message" : [
+          "Column IDs have changed:",
+          "<errors>"
+        ]
+      },
       "METADATA_COLUMNS_MISMATCH" : {
         "message" : [
           "Metadata columns have changed:",
@@ -2968,6 +3120,13 @@
     ],
     "sqlState" : "0A000"
   },
+  "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED" : {
+    "message" : [
+      "Table alias is not allowed with INSERT INTO ... REPLACE WHERE because the WHERE condition is evaluated against the target table directly.",
+      "Use INSERT INTO ... REPLACE ON if you need to reference the target table via an alias."
+    ],
+    "sqlState" : "42000"
+  },
   "INSUFFICIENT_TABLE_PROPERTY" : {
     "message" : [
       "Can't find table property:"
@@ -3278,6 +3437,34 @@
         "message" : [
           "`startingVersion` is required when `endingVersion` is specified for CDC queries."
         ]
+      },
+      "UPDATE_DETECTION_REQUIRES_CARRY_OVER_REMOVAL" : {
+        "message" : [
+          "`computeUpdates` cannot be used with `deduplicationMode=none` on connector `<changelogName>` because the connector emits copy-on-write carry-over pairs (`containsCarryoverRows()` returns true) that would be silently mislabeled as updates. Set `deduplicationMode` to `dropCarryovers` or `netChanges`."
+        ]
+      }
+    },
+    "sqlState" : "42K03"
+  },
+  "INVALID_CHANGELOG_SCHEMA" : {
+    "message" : [
+      "The Change Data Capture (CDC) schema returned by connector <changelogName> is invalid."
+    ],
+    "subClass" : {
+      "INVALID_COLUMN_TYPE" : {
+        "message" : [
+          "Column `<columnName>` has type <actualType>, expected <expectedType>."
+        ]
+      },
+      "MISSING_COLUMN" : {
+        "message" : [
+          "Required column `<columnName>` is missing."
+        ]
+      },
+      "MISSING_ROW_ID" : {
+        "message" : [
+          "Connector advertises one or more post-processing properties (`containsCarryoverRows`, `representsUpdateAsDeleteAndInsert`, `containsIntermediateChanges`) that require row identity, but `Changelog.rowId()` returned an empty array."
+        ]
       }
     },
     "sqlState" : "42K03"
@@ -3587,6 +3774,11 @@
       "Flow <flowIdentifier> returns an invalid relation type."
     ],
     "subClass" : {
+      "AUTOCDC_RELATION_FOR_TEMPORARY_VIEW" : {
+        "message" : [
+          "AutoCDC flows must target a streaming table because their reconciliation semantics require a streaming-table sink, but the flow <flowIdentifier> attempts to write an AutoCDC relation to the temporary view <viewIdentifier>."
+        ]
+      },
       "BATCH_RELATION_FOR_STREAMING_TABLE" : {
         "message" : [
           "Streaming tables may only be defined by streaming relations, but the flow <flowIdentifier> attempts to write a batch relation to the streaming table <tableIdentifier>. Consider using the STREAM operator in Spark-SQL to convert the batch relation into a streaming relation, or populating the streaming table with an append once-flow instead."
@@ -4066,6 +4258,12 @@
     },
     "sqlState" : "KD002"
   },
+  "INVALID_METRIC_VIEW_YAML" : {
+    "message" : [
+      "Failed to parse metric view YAML: <message>"
+    ],
+    "sqlState" : "42K0L"
+  },
   "INVALID_NAME_IN_USE_COMMAND" : {
     "message" : [
       "Invalid name '<name>' in <command> command. Reason: <reason>"
@@ -5262,6 +5460,49 @@
     ],
     "sqlState" : "0A000"
   },
+  "NEAREST_BY_JOIN" : {
+    "message" : [
+      "Invalid nearest-by join."
+    ],
+    "subClass" : {
+      "CROSS_JOIN_NOT_ENABLED" : {
+        "message" : [
+          "Nearest-by join is implemented as a bounded cross-product internally and is therefore rejected when `spark.sql.crossJoin.enabled = false`. Set `spark.sql.crossJoin.enabled = true` to permit it, or rewrite the query without nearest-by."
+        ]
+      },
+      "NON_ORDERABLE_RANKING_EXPRESSION" : {
+        "message" : [
+          "The ranking expression <expression> of type <type> is not orderable. Provide an expression that returns an orderable type, such as a numeric distance like abs(a.col - b.col) or a numeric similarity score."
+        ]
+      },
+      "NUM_RESULTS_OUT_OF_RANGE" : {
+        "message" : [
+          "The number of results <numResults> must be between <min> and <max>. Update the literal in `APPROX NEAREST <numResults> BY ...` (or `EXACT NEAREST <numResults> BY ...`) to fall within that range."
+        ]
+      },
+      "STREAMING_NOT_SUPPORTED" : {
+        "message" : [
+          "Nearest-by join is not supported with streaming DataFrames/Datasets."
+        ]
+      },
+      "UNSUPPORTED_DIRECTION" : {
+        "message" : [
+          "Unsupported nearest-by join direction '<direction>'. Supported nearest-by join directions include: <supported>."
+        ]
+      },
+      "UNSUPPORTED_JOIN_TYPE" : {
+        "message" : [
+          "Unsupported nearest-by join type <joinType>. Supported types: <supported>."
+        ]
+      },
+      "UNSUPPORTED_MODE" : {
+        "message" : [
+          "Unsupported nearest-by join mode '<mode>'. Supported modes include: <supported>."
+        ]
+      }
+    },
+    "sqlState" : "42604"
+  },
   "NEGATIVE_SCALE_DISALLOWED" : {
     "message" : [
       "Negative scale is not allowed: '<scale>'. Set the config <sqlConf> to \"true\" to allow it."
@@ -5922,6 +6163,12 @@
     ],
     "sqlState" : "42836"
   },
+  "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE" : {
+    "message" : [
+      "Recursive file loading is not supported when the data source has explicit partition columns. Either remove the option \"recursiveFileLookup\", or read the data without supplying partition columns (for example, do not read a partitioned table)."
+    ],
+    "sqlState" : "0A000"
+  },
   "RECURSIVE_PROTOBUF_SCHEMA" : {
     "message" : [
       "Found recursive reference in Protobuf schema, which can not be processed by Spark by default: <fieldDescriptor>. try setting the option `recursive.fields.max.depth` 1 to 10. Going beyond 10 levels of recursion is not allowed."
@@ -6798,6 +7045,11 @@
           "Duplicate streaming source names detected: <names>. Each streaming source must have a unique name."
         ]
       },
+      "INVALID_SINK_NAME" : {
+        "message" : [
+          "Invalid streaming sink name: '<sinkName>'. Sink names must only contain ASCII letters ('a'-'z', 'A'-'Z'), digits ('0'-'9'), and underscores ('_')."
+        ]
+      },
       "INVALID_SOURCE_NAME" : {
         "message" : [
           "Invalid streaming source name '<sourceName>'. Source names must only contain ASCII letters (a-z, A-Z), digits (0-9), and underscores (_)."
@@ -6808,6 +7060,11 @@
           "Streaming source naming is not supported. Source name '<name>' was provided but the feature is disabled. Please enable the feature by setting spark.sql.streaming.queryEvolution.enableSourceEvolution to true."
         ]
       },
+      "UNNAMED_STREAMING_SINKS_WITH_ENFORCEMENT" : {
+        "message" : [
+          "Streaming sink must be named when spark.sql.streaming.queryEvolution.enableSinkEvolution is enabled. Use the name() method on DataStreamWriter to assign a name to the streaming sink."
+        ]
+      },
       "UNNAMED_STREAMING_SOURCES_WITH_ENFORCEMENT" : {
         "message" : [
           "All streaming sources must be named when spark.sql.streaming.queryEvolution.enableSourceEvolution is enabled. Unnamed sources found: <sourceInfo>. Use the name() method to assign names to all streaming sources."
@@ -6921,6 +7178,12 @@
     ],
     "sqlState" : "22023"
   },
+  "ST_INVALID_ENDIANNESS_VALUE" : {
+    "message" : [
+      "Endianness '<endianness>' must be either 'NDR' (little-endian) or 'XDR' (big-endian)."
+    ],
+    "sqlState" : "22023"
+  },
   "ST_INVALID_SRID_VALUE" : {
     "message" : [
       "Invalid or unsupported SRID (spatial reference identifier) value: <srid>."
@@ -7786,6 +8049,11 @@
           "Referencing a lateral column alias <lca> in window expression <windowExpr>."
         ]
       },
+      "LATERAL_JOIN_NEAREST_BY" : {
+        "message" : [
+          "LATERAL correlation with NEAREST BY clause."
+        ]
+      },
       "LATERAL_JOIN_USING" : {
         "message" : [
           "JOIN USING with LATERAL correlation."
@@ -7973,6 +8241,26 @@
           "Store backend <stateStoreProvider> is not supported by TransformWithState operator. Please use RocksDBStateStoreProvider."
         ]
       },
+      "TABLESAMPLE_SYSTEM" : {
+        "message" : [
+          "TABLESAMPLE SYSTEM is only supported by data sources that implement block-level sampling."
+        ]
+      },
+      "TABLESAMPLE_SYSTEM_NO_SCAN" : {
+        "message" : [
+          "TABLESAMPLE SYSTEM requires a direct reference to a data source table that supports block-level sampling. It cannot be applied to subqueries, views, or tables with intervening operations."
+        ]
+      },
+      "TABLESAMPLE_SYSTEM_REPEATABLE" : {
+        "message" : [
+          "TABLESAMPLE SYSTEM does not support the REPEATABLE clause. Use TABLESAMPLE BERNOULLI for repeatable sampling with a seed."
+        ]
+      },
+      "TABLESAMPLE_SYSTEM_SAMPLE_METHOD" : {
+        "message" : [
+          "TABLESAMPLE SYSTEM does not support <sampleMethod> sampling. Only PERCENT sampling is supported."
+        ]
+      },
       "TABLE_OPERATION" : {
         "message" : [
           "Table <tableName> does not support <operation>. Please check the current catalog and namespace to make sure the qualified table name is expected, and also check the catalog implementation which is configured by \"spark.sql.catalog\"."
@@ -8171,6 +8459,29 @@
     },
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_SCHEMA_EVOLUTION" : {
+    "message" : [
+      "Schema evolution is not supported for this write."
+    ],
+    "subClass" : {
+      "CREATE_TABLE" : {
+        "message" : [
+          "Creating a new table does not support schema evolution."
+        ]
+      },
+      "REPLACE_TABLE" : {
+        "message" : [
+          "Replacing a table does not support schema evolution."
+        ]
+      },
+      "V1_TABLE" : {
+        "message" : [
+          "Writes to V1 tables or V1 data sources do not support schema evolution."
+        ]
+      }
+    },
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_SHOW_CREATE_TABLE" : {
     "message" : [
       "Unsupported a SHOW CREATE TABLE command."
@@ -8181,6 +8492,11 @@
           "The table <tableName> is a Spark data source table. Please use SHOW CREATE TABLE without AS SERDE instead."
         ]
       },
+      "ON_METRIC_VIEW" : {
+        "message" : [
+          "The command is not supported on a metric view <tableName>."
+        ]
+      },
       "ON_TEMPORARY_VIEW" : {
         "message" : [
           "The command is not supported on a temporary view <tableName>."
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
index 810bdabebb38a..3fac57dbe5dda 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -95,6 +95,11 @@ class LogEntry(messageWithContext: => MessageWithContext) {
   def message: String = cachedMessageWithContext.message
 
   def context: java.util.Map[String, String] = cachedMessageWithContext.context
+
+  def +(other: LogEntry): LogEntry = {
+    val combined = cachedMessageWithContext + other.cachedMessageWithContext
+    new LogEntry(combined)
+  }
 }
 
 /**
diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala
index fa5f99a1aae25..0af0e0f6de457 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala
@@ -53,8 +53,8 @@ private[spark] trait SparkTestUtils {
       Seq(
         "-classpath",
         classpathUrls
-          .map {
-            _.getFile
+          .map { u =>
+            new File(u.toURI).getPath
           }
           .mkString(File.pathSeparator))
     } else {
@@ -123,7 +123,8 @@ private[spark] trait SparkTestUtils {
 
     val options = Seq("-d", classDir.getAbsolutePath) ++ (
       if (classpathUrls.nonEmpty) {
-        Seq("-classpath", classpathUrls.map(_.getFile).mkString(File.pathSeparator))
+        Seq("-classpath",
+          classpathUrls.map(u => new File(u.toURI).getPath).mkString(File.pathSeparator))
       } else Seq.empty
     )
 
@@ -177,7 +178,7 @@ private[spark] trait SparkTestUtils {
     // on Windows to work around CMD's command-line length limit and by some build/CI
     // tools. Expand any such JARs before invoking scalac so the classpath is complete.
     val expandedClasspath = classpathUrls.flatMap(expandManifestClasspath)
-    val cpStr = expandedClasspath.map(_.getFile).mkString(File.pathSeparator)
+    val cpStr = expandedClasspath.map(u => new File(u.toURI).getPath).mkString(File.pathSeparator)
     val args = Array("-classpath", cpStr, "-d", classDir.getAbsolutePath) ++
       sourceFiles.map(_.getAbsolutePath)
 
@@ -216,7 +217,7 @@ private[spark] trait SparkTestUtils {
    * original URL unchanged.
    */
   private[spark] def expandManifestClasspath(url: URL): Seq[URL] = {
-    val file = new File(url.getFile)
+    val file = new File(url.toURI)
     if (!file.exists() || !file.getName.endsWith(".jar")) return Seq(url)
     try {
       val jarFile = new JarFile(file)
diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala
index 8b2807a80dd10..791be198a111c 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala
@@ -22,7 +22,7 @@ import scala.concurrent.Awaitable
 import scala.concurrent.duration.Duration
 import scala.util.control.NonFatal
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkThrowable}
 
 private[spark] object SparkThreadUtils {
   // scalastyle:off awaitresult
@@ -41,6 +41,14 @@ private[spark] object SparkThreadUtils {
    */
   @throws(classOf[SparkException])
   def awaitResult[T](awaitable: Awaitable[T], atMost: Duration): T = {
+    awaitResult(awaitable, atMost, preserveSparkThrowable = false)
+  }
+
+  @throws(classOf[SparkException])
+  def awaitResult[T](
+      awaitable: Awaitable[T],
+      atMost: Duration,
+      preserveSparkThrowable: Boolean): T = {
     try {
       awaitResultNoSparkExceptionConversion(awaitable, atMost)
     } catch {
@@ -48,6 +56,15 @@ private[spark] object SparkThreadUtils {
         throw e.throwable
       // TimeoutException is thrown in the current thread, so not need to warp
       // the exception.
+      // Re-throw exceptions that already carry a structured condition (SparkThrowable)
+      // to avoid wrapping them in a generic SparkException and losing the SQL state.
+      case st: Exception with SparkThrowable
+          if preserveSparkThrowable
+            && !st.isInstanceOf[TimeoutException] && st.getCondition != null =>
+        // Attach the caller's stack trace so it's not lost when re-throwing from a worker thread.
+        st.addSuppressed(
+          new SparkException("Exception thrown in awaitResult", cause = null))
+        throw st
       case NonFatal(t)
         if !t.isInstanceOf[TimeoutException] =>
         throw new SparkException("Exception thrown in awaitResult: ", t)
diff --git a/common/utils/src/test/scala/org/apache/spark/util/SparkTestUtilsSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/SparkTestUtilsSuite.scala
new file mode 100644
index 0000000000000..10a599739f6cf
--- /dev/null
+++ b/common/utils/src/test/scala/org/apache/spark/util/SparkTestUtilsSuite.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
+
+class SparkTestUtilsSuite extends AnyFunSuite with SparkTestUtils { // scalastyle:ignore funsuite
+
+  test("SPARK-57081: createCompiledClass with spaces in classpath") {
+    val dir = SparkFileUtils.createTempDir(namePrefix = "path with spaces")
+    val sourceFile = new JavaSourceFromString("Hello", "public class Hello {}")
+    val result = createCompiledClass("Hello", dir, sourceFile, Seq(dir.toURI.toURL))
+    assert(result.exists(), s"Compiled class file should exist at ${result.getPath}")
+  }
+}
diff --git a/common/variant/pom.xml b/common/variant/pom.xml
index a2bc7040e23c9..2ddd78eb7f17d 100644
--- a/common/variant/pom.xml
+++ b/common/variant/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
index 1bd008a5c9149..aaf6f72bd5364 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
@@ -43,7 +43,12 @@
  */
 public class VariantBuilder {
   public VariantBuilder(boolean allowDuplicateKeys) {
+    this(allowDuplicateKeys, true);
+  }
+
+  public VariantBuilder(boolean allowDuplicateKeys, boolean validateUnicodeInJsonParsing) {
     this.allowDuplicateKeys = allowDuplicateKeys;
+    this.validateUnicodeInJsonParsing = validateUnicodeInJsonParsing;
   }
 
   /**
@@ -53,18 +58,41 @@ public VariantBuilder(boolean allowDuplicateKeys) {
    * @throws IOException if any JSON parsing error happens.
    */
   public static Variant parseJson(String json, boolean allowDuplicateKeys) throws IOException {
+    return parseJson(json, allowDuplicateKeys, true);
+  }
+
+  /**
+   * Similar to {@link #parseJson(String, boolean)}, but additionally controls whether JSON
+   * string contents are validated to be well-formed Unicode (no unpaired UTF-16 surrogate code
+   * units). Strict validation is the default and matches RFC 8259 section 7. The flag exists
+   * to allow callers to opt out for backward compatibility with input that previously parsed
+   * (with the unpaired surrogate silently replaced by the Unicode replacement character).
+   */
+  public static Variant parseJson(String json, boolean allowDuplicateKeys,
+      boolean validateUnicodeInJsonParsing) throws IOException {
     try (JsonParser parser = new JsonFactory().createParser(json)) {
       parser.nextToken();
-      return parseJson(parser, allowDuplicateKeys);
+      return parseJson(parser, allowDuplicateKeys, validateUnicodeInJsonParsing);
     }
   }
 
   /**
-   * Similar {@link #parseJson(String, boolean)}, but takes a JSON parser instead of string input.
+   * Similar to {@link #parseJson(String, boolean)}, but takes a JSON parser instead of string
+   * input.
    */
   public static Variant parseJson(JsonParser parser, boolean allowDuplicateKeys)
       throws IOException {
-    VariantBuilder builder = new VariantBuilder(allowDuplicateKeys);
+    return parseJson(parser, allowDuplicateKeys, true);
+  }
+
+  /**
+   * Similar to {@link #parseJson(JsonParser, boolean)}, but additionally controls whether JSON
+   * string contents are validated to be well-formed Unicode. See
+   * {@link #parseJson(String, boolean, boolean)}.
+   */
+  public static Variant parseJson(JsonParser parser, boolean allowDuplicateKeys,
+      boolean validateUnicodeInJsonParsing) throws IOException {
+    VariantBuilder builder = new VariantBuilder(allowDuplicateKeys, validateUnicodeInJsonParsing);
     builder.buildJson(parser);
     return builder.result();
   }
@@ -495,6 +523,9 @@ private void buildJson(JsonParser parser) throws IOException {
         int start = writePos;
         while (parser.nextToken() != JsonToken.END_OBJECT) {
           String key = parser.currentName();
+          if (validateUnicodeInJsonParsing) {
+            checkValidUnicodeString(key, parser);
+          }
           parser.nextToken();
           int id = addKey(key);
           fields.add(new FieldEntry(key, id, writePos - start));
@@ -513,9 +544,14 @@ private void buildJson(JsonParser parser) throws IOException {
         finishWritingArray(start, offsets);
         break;
       }
-      case VALUE_STRING:
-        appendString(parser.getText());
+      case VALUE_STRING: {
+        String text = parser.getText();
+        if (validateUnicodeInJsonParsing) {
+          checkValidUnicodeString(text, parser);
+        }
+        appendString(text);
         break;
+      }
       case VALUE_NUMBER_INT:
         try {
           appendLong(parser.getLongValue());
@@ -557,6 +593,30 @@ private void parseFloatingPoint(JsonParser parser) throws IOException {
     }
   }
 
+  // Reject JSON strings that contain unpaired UTF-16 surrogate code units. Java strings can
+  // hold lone surrogates, but RFC 8259 section 7 requires JSON string contents to be well-formed
+  // Unicode. Stricter parsers such as simdjson reject these inputs, while Jackson's
+  // `ReaderBasedJsonParser` accepts them and silently replaces the invalid character with U+FFFD
+  // when the result is encoded as UTF-8. That silent replacement causes data corruption, so
+  // we surface a JSON parse error instead.
+  private static void checkValidUnicodeString(String str, JsonParser parser)
+      throws JsonParseException {
+    int len = str.length();
+    for (int i = 0; i < len; ++i) {
+      char c = str.charAt(i);
+      if (Character.isHighSurrogate(c)) {
+        if (i + 1 >= len || !Character.isLowSurrogate(str.charAt(i + 1))) {
+          throw new JsonParseException(parser, String.format(
+              "Invalid Unicode in JSON string: lone high surrogate U+%04X", (int) c));
+        }
+        ++i;
+      } else if (Character.isLowSurrogate(c)) {
+        throw new JsonParseException(parser, String.format(
+            "Invalid Unicode in JSON string: lone low surrogate U+%04X", (int) c));
+      }
+    }
+  }
+
   // Try to parse a JSON number as a decimal. Return whether the parsing succeeds. The input must
   // only use the decimal format (an integer value with an optional '.' in it) and must not use
   // scientific notation. It also must fit into the precision limitation of decimal types.
@@ -583,4 +643,8 @@ private boolean tryParseDecimal(String input) {
   // Store all keys in `dictionary` in the order of id.
   private final ArrayList<byte[]> dictionaryKeys = new ArrayList<>();
   private final boolean allowDuplicateKeys;
+  // When true, JSON string contents are validated to be well-formed Unicode (RFC 8259 sec 7).
+  // Unpaired UTF-16 surrogate code units cause a `JsonParseException` to be thrown during
+  // `buildJson`, which surfaces as a `MALFORMED_RECORD_IN_PARSING` error to SQL callers.
+  private final boolean validateUnicodeInJsonParsing;
 }
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
index 795d46ec2062b..ac93246991c0e 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
@@ -428,7 +428,8 @@ private static void checkDecimal(BigDecimal d, int maxPrecision) {
   // Get a decimal value from variant value `value[pos...]`.
   // Throw `MALFORMED_VARIANT` if the variant is malformed.
   public static BigDecimal getDecimalWithOriginalScale(byte[] value, int pos) {
-    checkIndex(pos, value.length);
+    // Decimal should at least have header + scale.
+    checkIndex(pos + 1, value.length);
     int basicType = value[pos] & BASIC_TYPE_MASK;
     int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK;
     if (basicType != PRIMITIVE) throw unexpectedType(Type.DECIMAL);
@@ -589,6 +590,92 @@ public static <T> T handleArray(byte[] value, int pos, ArrayHandler<T> handler)
     return handler.apply(size, offsetSize, offsetStart, dataStart);
   }
 
+  // Validate whether a variant is well-formed. Returns true if the variant binary is structurally
+  // well-formed (all bounds and type-info checks pass), false if it is malformed.
+  //
+  // This is close to, but not strictly equivalent to, "`toJson` does not throw": this function
+  // does not enforce the `SIZE_LIMIT` check that the `Variant` constructor applies (which throws
+  // `VARIANT_CONSTRUCTOR_SIZE_LIMIT`). The implementation otherwise has the same structure as
+  // `toJson` (see `Variant.toJsonImpl`).
+  //
+  // Implementation note: this `try { ... } catch (SparkRuntimeException e)` is sound only because
+  // every helper invoked by `validateImpl` throws `MALFORMED_VARIANT` /
+  // `UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT` rather than a raw `ArrayIndexOutOfBoundsException` on
+  // malformed input. Preserve that invariant when adding new cases.
+  public static boolean isValidVariant(byte[] value, byte[] metadata) {
+    if (value == null || metadata == null) return false;
+    // Validate the metadata version, similar to the check in the `Variant` constructor.
+    if (metadata.length < 1 || (metadata[0] & VERSION_MASK) != VERSION) return false;
+    try {
+      validateImpl(value, metadata, 0);
+      return true;
+    } catch (SparkRuntimeException e) {
+      return false;
+    }
+  }
+
+  private static void validateImpl(byte[] value, byte[] metadata, int pos) {
+    switch (getType(value, pos)) {
+      case OBJECT:
+        handleObject(value, pos, (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> {
+          for (int i = 0; i < size; ++i) {
+            int id = readUnsigned(value, idStart + idSize * i, idSize);
+            int offset = readUnsigned(value, offsetStart + offsetSize * i, offsetSize);
+            int elementPos = dataStart + offset;
+            getMetadataKey(metadata, id);
+            validateImpl(value, metadata, elementPos);
+          }
+          return null;
+        });
+        break;
+      case ARRAY:
+        handleArray(value, pos, (size, offsetSize, offsetStart, dataStart) -> {
+          for (int i = 0; i < size; ++i) {
+            int offset = readUnsigned(value, offsetStart + offsetSize * i, offsetSize);
+            int elementPos = dataStart + offset;
+            validateImpl(value, metadata, elementPos);
+          }
+          return null;
+        });
+        break;
+      case NULL:
+        break;
+      case BOOLEAN:
+        getBoolean(value, pos);
+        break;
+      case LONG:
+        getLong(value, pos);
+        break;
+      case STRING:
+        getString(value, pos);
+        break;
+      case DOUBLE:
+        getDouble(value, pos);
+        break;
+      case DECIMAL:
+        getDecimal(value, pos);
+        break;
+      case DATE:
+      case TIMESTAMP:
+      case TIMESTAMP_NTZ:
+        getLong(value, pos);
+        break;
+      case FLOAT:
+        getFloat(value, pos);
+        break;
+      case BINARY:
+        getBinary(value, pos);
+        break;
+      case UUID:
+        getUuid(value, pos);
+        break;
+      default:
+        // This is practically unreachable because we handle all possible types. It only
+        // intends to ensure we don't forget adding a new case when adding a new type.
+        throw malformedVariant();
+    }
+  }
+
   // Get a key at `id` in the variant metadata.
   // Throw `MALFORMED_VARIANT` if the variant is malformed. An out-of-bound `id` is also considered
   // a malformed variant because it is read from the corresponding variant value.
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index e4148ce906b83..b4f5cd72f551d 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 92487f11cc165..66022cab7c77b 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index faeb39108c4f9..d57d3aa5ea03e 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -244,6 +244,8 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
 
   override def supportsTableSample: Boolean = true
 
+  override def supportsTableSampleSystem: Boolean = true
+
   override def supportsIndex: Boolean = true
 
   override def indexOptions: String = "FILLFACTOR=70"
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index df5dfdf7deafb..79366189c20db 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -148,7 +148,7 @@ private[v2] trait V2JDBCTest
       partitionColumn: String)
   val tableNameToPartinioningOptions: Map[String, PartitioningInfo] = Map(
     "employee" -> PartitioningInfo("4", "1", "8", "dept"),
-    // new_table is used in "SPARK-37038: Test TABLESAMPLE" test
+    // new_table is used in "SPARK-37038,SPARK-57040: Test TABLESAMPLE" test
     "new_table" -> PartitioningInfo("4", "1", "20", "col1")
   )
 
@@ -470,6 +470,8 @@ private[v2] trait V2JDBCTest
 
   def supportsTableSample: Boolean = false
 
+  def supportsTableSampleSystem: Boolean = false
+
   test("SPARK-48172: Test CONTAINS") {
     val df1 = spark.sql(
       s"""
@@ -699,9 +701,20 @@ private[v2] trait V2JDBCTest
     assert(rows12(5).getString(0) === "special_character_underscorenot_present")
   }
 
+  test("SPARK-57040: TABLESAMPLE with replacement is not pushed down") {
+    withTable(s"$catalogName.new_table") {
+      sql(s"CREATE TABLE $catalogName.new_table (col1 INT, col2 INT)")
+      spark.range(10).select($"id" * 2, $"id" * 2 + 1).write.insertInto(s"$catalogName.new_table")
+      val df = spark.read.table(s"$catalogName.new_table")
+        .sample(withReplacement = true, fraction = 0.5, seed = 12345)
+      checkSamplePushed(df, false)
+      assert(df.collect().length > 0)
+    }
+  }
+
   val partitioningEnabledTestCase = Seq(true, false)
   gridTest(
-    "SPARK-37038: Test TABLESAMPLE"
+    "SPARK-37038,SPARK-57040: Test TABLESAMPLE"
   )(partitioningEnabledTestCase) { partitioningEnabled =>
     if (supportsTableSample) {
       withTable(s"$catalogName.new_table") {
@@ -789,6 +802,27 @@ private[v2] trait V2JDBCTest
         checkSamplePushed(df8, false)
         checkFilterPushed(df8)
         assert(df8.collect().length < 10)
+
+        // SYSTEM sampling pushdown
+        if (supportsTableSampleSystem) {
+          val df9 = sql(s"SELECT * FROM $catalogName.new_table $tableOptions " +
+            "TABLESAMPLE SYSTEM (50 PERCENT)")
+          checkSamplePushed(df9)
+          if (partitioningEnabled) {
+            multiplePartitionAdditionalCheck(df1, partitionInfo)
+          }
+          assert(df9.collect().length <= 10)
+
+          // SYSTEM sampling + column pruning
+          val df10 = sql(s"SELECT col1 FROM $catalogName.new_table $tableOptions " +
+            "TABLESAMPLE SYSTEM (50 PERCENT)")
+          checkSamplePushed(df10)
+          checkColumnPruned(df10, "col1")
+          if (partitioningEnabled) {
+            multiplePartitionAdditionalCheck(df1, partitionInfo)
+          }
+          assert(df10.collect().length <= 10)
+        }
       }
     }
   }
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index fbea96ec6ce7e..b86c94f3e35af 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index b21810d055fc4..4980e94c45776 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 7c3a7f4bd0fde..5c471db25becb 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 7da9bc31cb640..a7b5b06a6ff58 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 97df4301edffd..c73a0015c416e 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index c4708b5489c51..c24bd4886e770 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml
index 99e06e9bf8029..93572d6d671d3 100644
--- a/connector/profiler/pom.xml
+++ b/connector/profiler/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 4f6e826c7b8ba..e9521f9418c1f 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 57138b71b06e6..7b18a97cbd9de 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index f0ae6a4184ca2..6b228a86f3535 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index a40dab8a8dab7..2a3678a6b94da 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -594,28 +594,8 @@ public UnsafeSorterIterator getSortedIterator() throws IOException {
       logger.info("Merging {} spill files using bounded merge with factor {}",
           MDC.of(LogKeys.NUM_SPILL_WRITERS, spillWriters.size()),
           MDC.of(LogKeys.MERGE_FACTOR, spillMergeFactor));
-
-      // This assignment is not inside synchronized(this), unlike the read in
-      // cleanupResources(). That is safe because all callers of cleanupResources()
-      // (the task completion listener, iterator-end cleanup from wrappers like
-      // UnsafeExternalRowSorter / UnsafeKVExternalSorter / SortExec, etc.) run on
-      // the task thread, sequentially with getSortedIterator(). The volatile modifier
-      // on boundedMerger provides memory visibility across any intervening
-      // synchronized blocks.
-      boundedMerger = new UnsafeSorterBoundedSpillMerger(
-          spillMergeFactor,
-          recordComparatorSupplier.get(),
-          prefixComparator,
-          blockManager,
-          serializerManager,
-          fileBufferSizeBytes);
-
-      UnsafeSorterIterator inMemIter = null;
-      if (inMemSorter != null) {
-        readingIterator = new SpillableIterator(inMemSorter.getSortedIterator());
-        inMemIter = readingIterator;
-      }
-      return boundedMerger.merge(spillWriters, inMemIter);
+      BoundedMergerContext ctx = prepareBoundedMerge();
+      return ctx.merger.merge(ctx.snapshot, ctx.inMemIter);
     } else {
       // Original single-round merge: open all spill readers at once
       logger.info("Merging {} spill files in single round",
@@ -633,6 +613,60 @@ public UnsafeSorterIterator getSortedIterator() throws IOException {
     }
   }
 
+  @VisibleForTesting
+  static final class BoundedMergerContext {
+    final List<UnsafeSorterSpillWriter> snapshot;
+    @Nullable final SpillableIterator inMemIter;
+    final UnsafeSorterBoundedSpillMerger merger;
+
+    BoundedMergerContext(
+        List<UnsafeSorterSpillWriter> snapshot,
+        @Nullable SpillableIterator inMemIter,
+        UnsafeSorterBoundedSpillMerger merger) {
+      this.snapshot = snapshot;
+      this.inMemIter = inMemIter;
+      this.merger = merger;
+    }
+  }
+
+  @VisibleForTesting
+  BoundedMergerContext prepareBoundedMerge() {
+    // Snapshot MUST precede readingIterator publication. Once readingIterator is
+    // non-null, a sibling MemoryConsumer's spill request is routed via
+    // readingIterator.spill(), which appends a new writer to spillWriters AND rebinds
+    // readingIterator.upstream to that same file. A post-publication snapshot would
+    // then feed that file to BOTH the snapshot path and readingIterator -- duplicate
+    // records in the merged output. List.copyOf returns an unmodifiable list so any
+    // future code that mutates the snapshot (or aliases the live spillWriters field
+    // into the context and adds to it) fails fast.
+    final List<UnsafeSorterSpillWriter> snapshot = List.copyOf(spillWriters);
+
+    // The volatile fields published below -- boundedMerger and readingIterator -- are
+    // written without holding synchronized(this). Safe because all callers of
+    // getSortedIterator() and cleanupResources() (the task completion listener,
+    // iterator-end cleanup from wrappers like UnsafeExternalRowSorter /
+    // UnsafeKVExternalSorter / SortExec, etc.) run on the task thread, sequentially.
+    // The volatile modifier provides memory visibility to off-task-thread readers:
+    // sibling MemoryConsumer.spill() reads readingIterator, and cleanupResources()'s
+    // synchronized(this) read of boundedMerger crosses any intervening synchronized
+    // blocks.
+    final UnsafeSorterBoundedSpillMerger merger = new UnsafeSorterBoundedSpillMerger(
+        spillMergeFactor,
+        recordComparatorSupplier.get(),
+        prefixComparator,
+        blockManager,
+        serializerManager,
+        fileBufferSizeBytes);
+    boundedMerger = merger;
+
+    SpillableIterator inMemIter = null;
+    if (inMemSorter != null) {
+      readingIterator = new SpillableIterator(inMemSorter.getSortedIterator());
+      inMemIter = readingIterator;
+    }
+    return new BoundedMergerContext(snapshot, inMemIter, merger);
+  }
+
   @VisibleForTesting boolean hasSpaceForAnotherRecord() {
     return inMemSorter.hasSpaceForAnotherRecord();
   }
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java
index 1f389465a8b21..b844f9816bf3c 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java
@@ -90,6 +90,11 @@ final class UnsafeSorterBoundedSpillMerger {
    * <p>If {@code inMemIterator} is non-null, it is included in the final merge round
    * (not spilled to disk in intermediate rounds).</p>
    *
+   * <p>This method does not mutate the input {@code spillWriters} list; intermediate
+   * rounds reassign a local variable to fresh lists. Callers are still responsible for
+   * passing a defensive snapshot if they need to protect against concurrent mutation
+   * of the underlying list (see {@link UnsafeExternalSorter#prepareBoundedMerge}).</p>
+   *
    * @param spillWriters the list of spill writers to merge
    * @param inMemIterator optional in-memory sorted iterator to include in the final merge
    * @return a sorted iterator over all records
@@ -98,7 +103,7 @@ public UnsafeSorterIterator merge(
       List<UnsafeSorterSpillWriter> spillWriters,
       @Nullable UnsafeSorterIterator inMemIterator) throws IOException {
 
-    List<UnsafeSorterSpillWriter> spillsToMerge = new ArrayList<>(spillWriters);
+    List<UnsafeSorterSpillWriter> spillsToMerge = spillWriters;
     int round = 0;
 
     while (spillsToMerge.size() > mergeFactor) {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css
index 202579c6b67ce..e7a8f3ab0839a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css
@@ -58,4 +58,36 @@ table.dataTable thead .sorting_desc_disabled::after {
 
 div.dataTables_wrapper div.dataTables_length select {
   width: 100%;
+}
+
+/* SQL tab sub-execution disclosure (SPARK-56811) */
+table#sql-table td.sub-exec-toggle {
+  white-space: nowrap;
+}
+
+table#sql-table td.sub-exec-toggle a.toggle-sub-exec {
+  text-decoration: none;
+}
+
+table#sql-table td.sub-exec-toggle a.toggle-sub-exec:hover {
+  text-decoration: underline;
+}
+
+table#sql-table tr.shown td.sub-exec-toggle a.toggle-sub-exec {
+  font-weight: 600;
+}
+
+table#sql-table tr.shown + tr > td {
+  background-color: var(--bs-tertiary-bg, #f4f7fa);
+}
+
+table.sub-exec-table {
+  margin-left: 1.5rem !important;
+  width: calc(100% - 1.5rem) !important;
+  background-color: transparent;
+}
+
+table.sub-exec-table thead th {
+  font-weight: 600;
+  background-color: transparent;
 }
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index fad9bb522ad92..0262144490ce8 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -3152,6 +3152,8 @@ object SparkContext extends Logging {
   private[spark] val RDD_SCOPE_KEY = "spark.rdd.scope"
   private[spark] val RDD_SCOPE_NO_OVERRIDE_KEY = "spark.rdd.scope.noOverride"
   private[spark] val SQL_EXECUTION_ID_KEY = "spark.sql.execution.id"
+  private[spark] val DATASET_QUERY_EXECUTION_ID_KEY =
+    "spark.sql.dataset.queryExecution.id"
 
   /**
    * Executor id for the driver.  In earlier versions of Spark, this was `<driver>`, but this was
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala
index 8cd95ee653ebe..7c704c3d2b37e 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala
@@ -19,6 +19,7 @@ package org.apache.spark.api.r
 
 import java.io.{ByteArrayOutputStream, DataOutputStream}
 import java.nio.charset.StandardCharsets.UTF_8
+import java.security.MessageDigest
 
 import io.netty.channel.{Channel, ChannelHandlerContext, SimpleChannelInboundHandler}
 
@@ -34,7 +35,8 @@ private class RBackendAuthHandler(secret: String)
     // The R code adds a null terminator to serialized strings, so ignore it here.
     val clientSecret = new String(msg, 0, msg.length - 1, UTF_8)
     try {
-      require(secret == clientSecret, "Auth secret mismatch.")
+      require(MessageDigest.isEqual(secret.getBytes(UTF_8), clientSecret.getBytes(UTF_8)),
+        "Auth secret mismatch.")
       ctx.pipeline().remove(this)
       writeReply("ok", ctx.channel())
     } catch {
diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index 04302c77a3984..2a3fd0d004e11 100644
--- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -20,10 +20,12 @@ package org.apache.spark.deploy
 import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
 
+import org.apache.spark.SparkConf
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
 import org.apache.spark.deploy.master._
 import org.apache.spark.deploy.worker.ExecutorRunner
 import org.apache.spark.resource.{ResourceInformation, ResourceRequirement}
+import org.apache.spark.util.Utils
 
 private[deploy] object JsonProtocol {
 
@@ -123,10 +125,16 @@ private[deploy] object JsonProtocol {
    *         `memoryperexecutor` minimal memory in MB required to each executor
    *         `resourcesperexecutor` minimal resources required to each executor
    *         `user` name of the user who submitted the application
-   *         `command` the command string used to submit the application
+   *         `command` the command string used to submit the application, with secret-bearing
+   *         fields (`environment`, `javaOpts`) redacted using `spark.redaction.regex`
    * For compatibility also returns the deprecated `memoryperslave` & `resourcesperslave` fields.
    */
-  def writeApplicationDescription(obj: ApplicationDescription): JObject = {
+  def writeApplicationDescription(obj: ApplicationDescription, conf: SparkConf): JObject = {
+    val redactedEnvironment = Utils.redact(conf, obj.command.environment.toSeq).toMap
+    val redactedJavaOpts = Utils.redactCommandLineArgs(conf, obj.command.javaOpts)
+    val redactedCommand = obj.command.copy(
+      environment = redactedEnvironment,
+      javaOpts = redactedJavaOpts)
     ("name" -> obj.name) ~
     ("cores" -> obj.maxCores.getOrElse(0)) ~
     ("memoryperexecutor" -> obj.memoryPerExecutorMB) ~
@@ -134,7 +142,7 @@ private[deploy] object JsonProtocol {
     ("memoryperslave" -> obj.memoryPerExecutorMB) ~
     ("resourcesperslave" -> obj.resourceReqsPerExecutor.toList.map(writeResourceRequirement)) ~
     ("user" -> obj.user) ~
-    ("command" -> obj.command.toString)
+    ("command" -> redactedCommand.toString)
   }
 
   /**
@@ -154,7 +162,7 @@ private[deploy] object JsonProtocol {
     ("memory" -> obj.memory) ~
     ("resources" -> writeResourcesInfo(obj.resources)) ~
     ("appid" -> obj.appId) ~
-    ("appdesc" -> writeApplicationDescription(obj.appDesc))
+    ("appdesc" -> writeApplicationDescription(obj.appDesc, obj.conf))
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 8f0b684a93e81..5bfb486b0e606 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -54,7 +54,7 @@ private[deploy] class ExecutorRunner(
     val sparkHome: File,
     val executorDir: File,
     val workerUrl: String,
-    conf: SparkConf,
+    val conf: SparkConf,
     val appLocalDirs: Seq[String],
     @volatile var state: ExecutorState.Value,
     val rpId: Int,
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
index 98b80317db982..8ecb14be1dfb8 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
@@ -39,6 +39,13 @@ private[spark] object Tests {
     .booleanConf
     .createOptional
 
+  val INJECT_SHUFFLE_FETCH_FAILURES =
+    ConfigBuilder("spark.testing.injectShuffleFetchFailures")
+      .doc("Injecting fetch failures for shuffle stages by providing an invalid BlockManager " +
+        "location for the first stage attempt. Testing only flag!")
+      .booleanConf
+      .createWithDefault(false)
+
   val TEST_NO_STAGE_RETRY = ConfigBuilder("spark.test.noStageRetry")
     .version("1.2.0")
     .booleanConf
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index d7bda5bbe721a..3c045fcd95411 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -99,7 +99,7 @@ class HadoopMapReduceCommitProtocol(
    * e.g. a=1/b=2. Files under these partitions will be saved into staging directory and moved to
    * destination directory at the end, if `dynamicPartitionOverwrite` is true.
    */
-  @transient private var partitionPaths: mutable.Set[String] = null
+  @transient protected var partitionPaths: mutable.Set[String] = null
 
   /**
    * The staging directory of this write job. Spark uses it to deal with files with absolute output
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
index 49c259999a471..675c44153cd4d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
@@ -130,6 +130,22 @@ private[spark] object RDDOperationScope extends Logging {
       name: String,
       allowNesting: Boolean,
       ignoreParent: Boolean)(body: => T): T = {
+    withScope(sc, name, allowNesting, ignoreParent,
+      nextScopeId().toString)(body)
+  }
+
+  /**
+   * Execute the given body such that all RDDs created in this body
+   * will have the same scope, with an explicit scope ID.
+   *
+   * Note: Return statements are NOT allowed in body.
+   */
+  private[spark] def withScope[T](
+      sc: SparkContext,
+      name: String,
+      allowNesting: Boolean,
+      ignoreParent: Boolean,
+      rddScopeId: String)(body: => T): T = {
     // Save the old scope to restore it later
     val scopeKey = SparkContext.RDD_SCOPE_KEY
     val noOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY
@@ -139,10 +155,12 @@ private[spark] object RDDOperationScope extends Logging {
     try {
       if (ignoreParent) {
         // Ignore all parent settings and scopes and start afresh with our own root scope
-        sc.setLocalProperty(scopeKey, new RDDOperationScope(name).toJson)
+        sc.setLocalProperty(scopeKey,
+          new RDDOperationScope(name, None, rddScopeId).toJson)
       } else if (sc.getLocalProperty(noOverrideKey) == null) {
         // Otherwise, set the scope only if the higher level caller allows us to do so
-        sc.setLocalProperty(scopeKey, new RDDOperationScope(name, oldScope).toJson)
+        sc.setLocalProperty(scopeKey,
+          new RDDOperationScope(name, oldScope, rddScopeId).toJson)
       }
       // Optionally disallow the child body to override our scope
       if (!allowNesting) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 5fbd160bc683b..22720b98aafde 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -325,6 +325,16 @@ private[spark] class DAGScheduler(
   private val messageScheduler =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("dag-scheduler-message")
 
+  private def scheduleResubmit(): Unit = {
+    messageScheduler.schedule(
+      new Runnable {
+        override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages)
+      },
+      DAGScheduler.RESUBMIT_TIMEOUT,
+      TimeUnit.MILLISECONDS
+    )
+  }
+
   private[spark] var eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
   // Used for test only. Some tests uses the same thread of the event poster to
   // process the events to ensure the deterministic behavior during the test.
@@ -1858,6 +1868,11 @@ private[spark] class DAGScheduler(
             throw SparkCoreErrors.accessNonExistentAccumulatorError(id)
         }
         acc.merge(updates.asInstanceOf[AccumulatorV2[Any, Any]])
+        if (acc.isInstanceOf[LastAttemptAccumulator[_, _, _]]) {
+          acc.asInstanceOf[LastAttemptAccumulator[_, _, _]].mergeLastAttempt(
+            updates, stage.rdd, event.taskInfo,
+            task.stageId, task.stageAttemptId, task.localProperties)
+        }
         // To avoid UI cruft, ignore cases where value wasn't updated
         if (acc.name.isDefined && !updates.isZero) {
           stage.latestInfo.accumulables(id) = acc.toInfo(None, Some(acc.value))
@@ -2174,13 +2189,7 @@ private[spark] class DAGScheduler(
       if (noResubmitEnqueued) {
         logInfo(log"Resubmitting ${MDC(FAILED_STAGE, stage)} " +
           log"(${MDC(FAILED_STAGE_NAME, stage.name)}) due to rollback.")
-        messageScheduler.schedule(
-          new Runnable {
-            override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages)
-          },
-          DAGScheduler.RESUBMIT_TIMEOUT,
-          TimeUnit.MILLISECONDS
-        )
+        scheduleResubmit()
       }
     }
 
@@ -2333,6 +2342,19 @@ private[spark] class DAGScheduler(
                 // The epoch of the task is acceptable (i.e., the task was launched after the most
                 // recent failure we're aware of for the executor), so mark the task's output as
                 // available.
+                // For testing purposes, inject fetch failures controlled from the driver-side by
+                // supplying an invalid location.
+                if (Utils.isTesting &&
+                    sc.conf.get(config.Tests.INJECT_SHUFFLE_FETCH_FAILURES) &&
+                    task.stageAttemptId == 0) {
+                  val currentLocation = status.location
+                  val invalidLocation = BlockManagerId(
+                    execId = BlockManagerId.INVALID_EXECUTOR_ID,
+                    host = currentLocation.host,
+                    port = currentLocation.port,
+                    topologyInfo = currentLocation.topologyInfo)
+                  status.updateLocation(invalidLocation)
+                }
                 val isChecksumMismatched = mapOutputTracker.registerMapOutput(
                   shuffleStage.shuffleDep.shuffleId, smt.partitionId, status)
                 if (isChecksumMismatched) {
@@ -2492,13 +2514,7 @@ private[spark] class DAGScheduler(
                 log"Resubmitting ${MDC(STAGE, mapStage)} " +
                 log"(${MDC(STAGE_NAME, mapStage.name)}) and ${MDC(FAILED_STAGE, failedStage)} " +
                 log"(${MDC(FAILED_STAGE_NAME, failedStage.name)}) due to fetch failure")
-              messageScheduler.schedule(
-                new Runnable {
-                  override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages)
-                },
-                DAGScheduler.RESUBMIT_TIMEOUT,
-                TimeUnit.MILLISECONDS
-              )
+              scheduleResubmit()
             }
           }
 
@@ -2605,9 +2621,7 @@ private[spark] class DAGScheduler(
             if (noResubmitEnqueued) {
               logInfo(log"Resubmitting ${MDC(FAILED_STAGE, failedStage)} " +
                 log"(${MDC(FAILED_STAGE_NAME, failedStage.name)}) due to barrier stage failure.")
-              messageScheduler.schedule(new Runnable {
-                override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages)
-              }, DAGScheduler.RESUBMIT_TIMEOUT, TimeUnit.MILLISECONDS)
+              scheduleResubmit()
             }
           }
         }
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
index ecebb97ecfc1d..d2a81e56265c9 100644
--- a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
+++ b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
@@ -21,6 +21,7 @@ import java.io.{DataInputStream, DataOutputStream}
 import java.net.Socket
 import java.nio.channels.SocketChannel
 import java.nio.charset.StandardCharsets.UTF_8
+import java.security.MessageDigest
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config.Python.{PYTHON_UNIX_DOMAIN_SOCKET_DIR, PYTHON_UNIX_DOMAIN_SOCKET_ENABLED}
@@ -65,7 +66,7 @@ private[spark] class SocketAuthHelper(val conf: SparkConf) {
       try {
         s.setSoTimeout(10000)
         val clientSecret = readUtf8(s)
-        if (secret == clientSecret) {
+        if (MessageDigest.isEqual(secret.getBytes(UTF_8), clientSecret.getBytes(UTF_8))) {
           writeUtf8("ok", s)
           shouldClose = false
         } else {
diff --git a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
index 847d3ffff630a..78e137f53355c 100644
--- a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
@@ -218,7 +218,7 @@ private[spark] trait PagedTable[T] {
       }
     }
 
-    <div class="d-flex justify-content-between align-items-center">
+    <div class="d-flex justify-content-between align-items-center mb-3">
       <div class="d-flex align-items-center">
         <span class="pe-1">Page: </span>
         <ul class="pagination mb-0">
diff --git a/core/src/main/scala/org/apache/spark/util/LastAttemptAccumulator.scala b/core/src/main/scala/org/apache/spark/util/LastAttemptAccumulator.scala
new file mode 100644
index 0000000000000..a262750a8cd33
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/LastAttemptAccumulator.scala
@@ -0,0 +1,984 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import scala.math.Ordering.Implicits._
+import scala.reflect.ClassTag
+import scala.util.control.NonFatal
+
+import org.apache.spark.SparkContext
+import org.apache.spark.internal.{LogEntry, Logging, LogKey, LogKeys}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.scheduler.TaskInfo
+
+/*
+ *  Last Attempt Accumulators are Accumulators that track the value of a metric aggregated across
+ *  the "last execution" that produced the values. "Last execution" can be defined as:
+ *  - For RDDs: the last execution of a given RDD partition, in the latest Stage and Stage attempt
+ *    that recomputed it.
+ *  - Across RDDs: lastAttemptValueForRDDId, lastAttemptValueForRDDIds, lastAttemptValueForAllRDDs,
+ *    lastAttemptValueForHighestRDDId let specify that only values from specific RDDs should be
+ *    aggregated.
+ *  - For Spark SQL Execution: In SQLLastAttemptAccumulator, lastAttemptValueForDataset,
+ *    lastAttemptValueForQueryExecution let specify that only values from the last SQL execution of
+ *    a specific Dataset (or QueryExecution) should be aggregated.
+ *
+ *  In specific situations the last attempt value cannot be computed. This is both because of known
+ *  specific user actions (e.g. mixing driver updates with task updates), and because the
+ *  accumulator performs (and logs) various internal sanity checks and bails out if it detects an
+ *  unexpected situation. Therefore, all the lastAttempt methods return an Option[OUT], where None
+ *  means that it has bailed out.
+ *
+ *  Updates to the accumulator from completed Tasks are merged in mergeLastAttempt, called from
+ *  DAGScheduler.updateAccumulators, called from DAGScheduler.handleTaskCompletion in the single
+ *  threaded DAGScheduler event loop. Therefore, we don't need to worry about concurrency control
+ *  when updating the accumulator values. However, reading of the last attempt value can potentially
+ *  be done concurrently, so we use synchronization. When there is normally no contention, JVM
+ *  synchronization should be very low overhead.
+ *
+ *  In order to be able to provide last attempt value, we need to keep track of partial metric
+ *  values, so that after a partial re-attempt the partial value can be updated, and then
+ *  re-aggregated.
+ *  There are various sources of re-attempts that we have to track:
+ *
+ *  1. Spark Core.
+ *  ==============
+ *    - Updates from failed tasks are filtered in Task.collectAccumulatorUpdates before they are
+ *      even passed back to the driver. We don't need to worry about them here.
+ *    - We should not get results from two successful attempts of a Task in the same Stage attempt.
+ *      TaskSetManager.handleSuccessfulTask ensures that.
+ *    - Therefore we only need to track Stage retries. The Last Attempt Metric will aggregate the
+ *      metric value of a given RDD partition from the last attempt of the Stage with the highest
+ *      stageId.
+ *      Normally recomputation creates a new stageAttemptId in the same Stage, but there can also
+ *      be multiple new Stages due to:
+ *      - In AQE, a materialized QueryStage is submitted as a new Stage, which would normally get
+ *        skipped, as it is already materialized. However, if results of that stage have been lost,
+ *        the recomputation will happen in that Stage.
+ *      - If the same Dataset with the same QueryExecution and same executedPlan is reused for
+ *        another execution (e.g. again calling collect()). All map stages should be materialized,
+ *        so like with AQE, they should be skipped, unless the results have been lost. Then,
+ *        recomputation will happen in that Stage. The result stage computing the action will be
+ *        fully re-executed.
+ *    - Due to the async nature of cancellation, there can be tasks from previous attempts that
+ *      arrive later than the last attempt. Therefore, we need to track and compare stageId and
+ *      stageAttemptId of every computed RDD partition, in order to discard latecomers.
+ *
+ *  2. Spark SQL.
+ *  =============
+ *  LastAttemptAccumulator offers simple tracking of the last SQL execution, by assuming that
+ *  the last execution will be in the scope of an RDD with the highest id, and using
+ *  [[lastAttemptValueForHighestRDDId]]. See SQLLastAttemptAccumulator for more possibilities
+ *  of tracking SQL execution.
+ *
+ *  Simple last SQL execution tracking
+ *  ----------------------------------
+ *  Whenever an AQE replan happens, or a repeated execution is submitted, there will be a new
+ *  RDD created for that execution. If AQE creates a new plan, it always uses it and cancels
+ *  the previous one. So, aggregating the metric updates from the RDD with the highest id
+ *  should correspond to the last execution and the latest AQE plan.
+ *  This has some limitations, e.g. doesn't work if the same metric is used in multiple places
+ *  in the query plan, and we want all occurrences to be aggregated together.
+ *  It also wouldn't work if a SparkPlan splits its execution into multiple RDDs. This for example
+ *  happens in BroadcastNestedLoopJoinExec with matchedStreamRows and notMatchedBroadcastRows.
+ *  One can use this simple last attempt tracking by using lastAttemptLastRDDValue.
+ *
+ *  3. Driver only updates.
+ *  =======================
+ *  Sometimes the metric is manipulated directly from the driver, not from within a Task.
+ *  It can be either explicit by user code, or implicit by Catalyst Optimizer, for example
+ *  ConvertToLocalRelation rule, folding a piece of the plan by evaluating it manually on the
+ *  driver.
+ *  When this happens, LastAttemptAccumulator has no information to reason about what was the
+ *  last execution. If the only metric updates are coming from the driver, it assumes that these are
+ *  the "last attempt". If there are both updates from executors and from the driver, it bails out.
+ *
+ *  Implementation
+ *  ==============
+ *  To track the last attempts, we track a map of metric values per RDD id:
+ *  - Map[RddId, LastAttemptRDDVals[PARTIAL]]
+ *
+ *  In LastAttemptRDDVals we track an Array of per RDD partition partial merge values, together with
+ *  the stageId and stageAttemptId and taskAttemptNumber to record task execution.
+ *  We also track the RDD id, RDDScope id and last SQL execution id updating that RDD.
+ *
+ * Normally to merge partial values, two full Accumulators are used. However, accumulator classes
+ * that support Last Attempt have to implement partialMerge which merges PARTIAL type.
+ * This is used to have more compact representation, as PARTIAL can be e.g. a primitive type as
+ * opposed to a full AccumulatorV2 object instance.
+ */
+
+
+private class LastAttemptRDDVals[@specialized T](
+    val rddId: Int,
+    val rddScopeId: Option[String],
+    // Array of partial metric values, indexed by RDD partition id.
+    // Metric updates to a given RDD partition can come from different stageAttempts if a retry
+    // happens while a Job with the Stage is running (a downstream Stage within a Job detects
+    // missing blocks and triggers recompute), or from different Stages, if a retry happens later
+    // (a new Job is submitted that depends on data from the RDD, if it finds it's missing it will
+    // recompute it in a new Stage).
+    // If a missing output is detected in a Stage while the stage is still running (e.g. executor
+    // is lost or decommissioned while the stage is running, and loses the output of some already
+    // finished tasks), a new Task with new taskAttemptNumber will be started for that Task.
+    // There may be multiple Tasks with different taskAttemptNumbers running in parallel due to
+    // speculation, but DAGScheduler guarantees that only one of them will reach metrics reporting,
+    // so it doesn't have to be dealt with here.
+    //
+    // There may be partitions that are either not computed at all (for example, due to early stop
+    // in take/limit), or AQE task coalescing may be visible as an update of the partition id of
+    // the first partition of the coalesced range. AQE guarantees that if these are retried, they
+    // will be coalesced in the same ranges, so update the same values.
+    // Whether a partition has been computed is tracked by [[computedBitmap]] below; the value at
+    // its slot in [[partitionPartialVals]] is undefined (typically the zero of T) for uncomputed
+    // partitions.
+    //
+    // Arrays of primitive types are more memory efficient than an array of objects due to
+    // references, object headers and paddings overheads.
+    // The `@specialized` annotation should make scala specialize it to use primitive array instead
+    // of boxed objects.
+    val partitionPartialVals: Array[T])
+  {
+
+  import LastAttemptRDDVals.EMPTY_ID
+
+  // In a case of repeated execution of the same QueryExecution and reuse of the SparkPlan
+  // (for example multiple `collect()` on the same Dataset), a new RDD may be executed in the same
+  // RDDOperationScope for the new execution. Hence, we can have multiple RDDs with the same
+  // RDDOperationScope, coming from different SQL executions and we should only count the last one.
+  // However, it may also be an old RDD that is reused in the new execution, but needs to be
+  // partially recomputed because part of it is missing. In that case, the last attempt value needs
+  // to still be aggregated over the whole RDD, because the whole RDD is used in the new execution.
+  // Note that this only applies per RDDOperationScope/SparkPlan, because other plans in the same
+  // new execution may have reused their RDD in whole, and hence have the last SQL executionId
+  // come from an earlier execution.
+  // Note: This doesn't work in case a user concurrently executed multiple actions on the same
+  // Dataset, resulting in multiple concurrent executions trying to compute the same RDD. This
+  // however should not happen in practice and would likely produce other unexpected effects.
+  var lastSqlExecutionId: Option[Long] = None
+
+  // Common (stageId, stageAttemptId, taskAttemptNumber) shared by the majority of computed
+  // partitions. In the common case (no stage retries), every computed partition has the same
+  // attempt tuple, so we store it once at the RDD level instead of allocating three N-sized int
+  // arrays. The values are set on the first update and never changed; partitions whose attempt
+  // differs are recorded in the override arrays below.
+  // EMPTY_ID until the first update.
+  private var commonStageId: Int = EMPTY_ID
+  private var commonStageAttemptId: Int = EMPTY_ID
+  private var commonTaskAttemptNumber: Int = EMPTY_ID
+
+  // Bitmap of partitions that have been computed, one bit per partition packed into longs.
+  // A bit is set when a partition receives its first update; a partition with a clear bit has not
+  // been computed (e.g. early stop in take/limit, AQE task coalescing).
+  // Reads of an individual long are atomic on 64-bit JVMs, matching the loose concurrency
+  // semantics of the original per-partition int arrays.
+  private val computedBitmap: Array[Long] =
+    new Array[Long]((partitionPartialVals.length + 63) >>> 6)
+
+  // Per-partition override arrays for each component of the attempt tuple. Each is allocated
+  // lazily and independently the first time some partition's value for that component diverges
+  // from the common value; until then the field is null and no per-partition state is kept for
+  // that component. Once allocated, an array is sized [[numPartitions]]: entries equal to
+  // EMPTY_ID mean "match the common value" and any other value is the per-partition override.
+  // This way:
+  //  - RDDs without retries pay zero per-partition allocations (all three fields stay null).
+  //  - A pure stage retry (new stageAttemptId, same stageId, taskAttemptNumber resets to 0)
+  //    allocates only [[overrideStageAttemptIds]].
+  //  - A mid-stage retry (executor lost, some tasks restart with a higher taskAttemptNumber)
+  //    allocates only [[overrideTaskAttemptNumbers]].
+  //  - Whole-stage cross-Stage retry (new stageId) allocates [[overrideStageIds]] too.
+  //
+  // Concurrency: update() is called only from the DAGScheduler scheduler loop. Some readers of
+  // the state can run concurrently (e.g. logAccumulatorState formatting). The fields are
+  // declared @volatile, and the new array is fully populated before the field is assigned, so a
+  // reader either sees null (use common) or sees an array whose Array.fill initialization is
+  // visible. In-place element writes for subsequent overrides are plain ints; readers may see
+  // them eventually, matching the loose semantics of the original per-partition int arrays.
+  @volatile private var overrideStageIds: Array[Int] = null
+  @volatile private var overrideStageAttemptIds: Array[Int] = null
+  @volatile private var overrideTaskAttemptNumbers: Array[Int] = null
+
+  def numPartitions: Int = partitionPartialVals.length
+
+  def isEmptyAt(partitionId: Int): Boolean = {
+    val word = computedBitmap(partitionId >>> 6)
+    ((word >>> (partitionId & 63)) & 1L) == 0L
+  }
+
+  private def setComputedBit(partitionId: Int): Unit = {
+    val idx = partitionId >>> 6
+    computedBitmap(idx) = computedBitmap(idx) | (1L << (partitionId & 63))
+  }
+
+  /**
+   * Records a new value for one component (stageId / stageAttemptId / taskAttemptNumber) of the
+   * attempt tuple at `partitionId`, allocating the override array on first divergence. Returns
+   * the array reference the caller should write back to the @volatile field - either a freshly
+   * allocated and populated array (first override for this component) or the existing array
+   * after an in-place update. Once the array exists, the value is always written, even when it
+   * matches the common value - lookupComponent returns it correctly either way.
+   */
+  private def setOverrideComponent(
+      array: Array[Int],
+      partitionId: Int,
+      value: Int,
+      common: Int): Array[Int] = {
+    if (array == null) {
+      if (value == common) null
+      else {
+        val newArr = Array.fill(partitionPartialVals.length)(EMPTY_ID)
+        newArr(partitionId) = value
+        newArr
+      }
+    } else {
+      array(partitionId) = value
+      array
+    }
+  }
+
+  /** Reads one component's value at `partitionId`, falling back to `common` when the override
+   *  array is null or the entry is still EMPTY_ID (the slot was either not yet written, or was
+   *  initialized to EMPTY_ID and never overwritten because the partition's value matched the
+   *  common when the array was first allocated for a different partition). */
+  private def lookupComponent(array: Array[Int], partitionId: Int, common: Int): Int = {
+    if (array == null) common
+    else {
+      val v = array(partitionId)
+      if (v == EMPTY_ID) common else v
+    }
+  }
+
+  def update(partialValue: AccumulatorPartialVal[T]): Unit = {
+    val partId = partialValue.rddPartitionId
+    if (commonStageId == EMPTY_ID) {
+      commonStageId = partialValue.stageId
+      commonStageAttemptId = partialValue.stageAttemptId
+      commonTaskAttemptNumber = partialValue.taskAttemptNumber
+    }
+    partitionPartialVals(partId) = partialValue.partialMergeVal
+    setComputedBit(partId)
+    overrideStageIds = setOverrideComponent(
+      overrideStageIds, partId, partialValue.stageId, commonStageId)
+    overrideStageAttemptIds = setOverrideComponent(
+      overrideStageAttemptIds, partId, partialValue.stageAttemptId, commonStageAttemptId)
+    overrideTaskAttemptNumbers = setOverrideComponent(
+      overrideTaskAttemptNumbers, partId, partialValue.taskAttemptNumber, commonTaskAttemptNumber)
+    lastSqlExecutionId = partialValue.sqlExecutionId
+  }
+
+  def partialValueAt(partId: Int): AccumulatorPartialVal[T] = {
+    var sId = EMPTY_ID
+    var saId = EMPTY_ID
+    var tan = EMPTY_ID
+    if (!isEmptyAt(partId)) {
+      sId = lookupComponent(overrideStageIds, partId, commonStageId)
+      saId = lookupComponent(overrideStageAttemptIds, partId, commonStageAttemptId)
+      tan = lookupComponent(overrideTaskAttemptNumbers, partId, commonTaskAttemptNumber)
+    }
+    AccumulatorPartialVal(
+      partialMergeVal = partitionPartialVals(partId),
+      rddId = rddId,
+      rddPartitionId = partId,
+      rddNumPartitions = partitionPartialVals.length,
+      rddScopeId = rddScopeId,
+      stageId = sId,
+      stageAttemptId = saId,
+      taskAttemptNumber = tan,
+      sqlExecutionId = lastSqlExecutionId)
+  }
+
+  override def toString: String = {
+    val n = numPartitions
+    val partVals = new StringBuilder("[")
+    val sIds = new StringBuilder("[")
+    val saIds = new StringBuilder("[")
+    val tans = new StringBuilder("[")
+    var i = 0
+    while (i < n) {
+      if (i > 0) {
+        partVals.append(',')
+        sIds.append(',')
+        saIds.append(',')
+        tans.append(',')
+      }
+      partVals.append(partitionPartialVals(i))
+      val pv = partialValueAt(i)
+      sIds.append(pv.stageId)
+      saIds.append(pv.stageAttemptId)
+      tans.append(pv.taskAttemptNumber)
+      i += 1
+    }
+    partVals.append(']')
+    sIds.append(']')
+    saIds.append(']')
+    tans.append(']')
+    s"""LastAttemptVal(
+       |  rddId=$rddId,
+       |  rddScopeId=$rddScopeId,
+       |  lastSqlExecutionId=$lastSqlExecutionId,
+       |  partitionPartialVals=$partVals,
+       |  stageIds=$sIds,
+       |  stageAttemptIds=$saIds,
+       |  taskAttemptNumbers=$tans
+       |)""".stripMargin
+  }
+}
+
+private object LastAttemptRDDVals {
+  // EMPTY_ID indicates "no attempt recorded": used as the initial value of the common
+  // (stageId, stageAttemptId, taskAttemptNumber) before any update, and as the value returned
+  // by partialValueAt for partitions that have not been computed.
+  val EMPTY_ID: Int = -1
+
+  def apply[@specialized T](
+      rddId: Int,
+      rddScopeId: Option[String],
+      numPartitions: Int)(implicit ct: ClassTag[T]): LastAttemptRDDVals[T] = {
+    new LastAttemptRDDVals[T](rddId, rddScopeId, new Array[T](numPartitions))
+  }
+
+  def createFromFirstUpdate[@specialized T](
+      update: AccumulatorPartialVal[T])(implicit ct: ClassTag[T]): LastAttemptRDDVals[T] = {
+    val newVal = LastAttemptRDDVals[T](
+      rddId = update.rddId,
+      rddScopeId = update.rddScopeId,
+      update.rddNumPartitions)
+    newVal.update(update)
+    newVal
+  }
+}
+
+private class LastAttemptMap[K, V] {
+  // Map used to keep metric updates, keyed by RDD id or RDD scope id, backed by a List.
+  // In the majority of cases (when there are no stage retries and no AQE replanning
+  // cancelling already running stages), there will be only one key, so a list backed map
+  // should have less overhead.
+  //
+  // Accumulators are modified only from DAGScheduler.updateAccumulators -> mergeLastAttempt,
+  // which is running from a single thread (scheduling loop), so no concurrency control is needed
+  // for updates. Read accesses to an immutable list should use a consistent state without extra
+  // synchronization.
+
+  @volatile private var map: List[(K, V)] = Nil
+
+  def contains(key: K): Boolean = map.exists(_._1 == key)
+
+  def get(key: K): Option[V] = map.collectFirst { case (k, v) if k == key => v }
+
+  def put(key: K, value: V): Unit = synchronized {
+    map = (key, value) :: map.filterNot(_._1 == key)
+  }
+
+  def keys: Iterable[K] = map.map(_._1)
+  def values: Iterable[V] = map.map(_._2)
+  def isEmpty: Boolean = map.isEmpty
+  def nonEmpty: Boolean = map.nonEmpty
+  def clear(): Unit = synchronized { map = Nil }
+
+  override def toString: String = map
+    .map(elem => s"${elem._1} -> ${elem._2}").mkString("LastAttemptMap {\n", ",\n", "\n}")
+}
+
+private case class AccumulatorPartialVal[PARTIAL](
+    partialMergeVal: PARTIAL,
+    rddId: Int,
+    rddPartitionId: Int,
+    rddNumPartitions: Int,
+    rddScopeId: Option[String],
+    stageId: Int,
+    stageAttemptId: Int,
+    taskAttemptNumber: Int,
+    sqlExecutionId: Option[Long]
+) {
+  override def toString: String = {
+    s"""AccumulatorPartialVal(
+       |  partialMergeVal=$partialMergeVal,
+       |  rddId=$rddId,
+       |  rddPartitionId=$rddPartitionId,
+       |  rddNumPartitions=$rddNumPartitions,
+       |  rddScopeId=$rddScopeId,
+       |  stageId=$stageId,
+       |  stageAttemptId=$stageAttemptId,
+       |  taskAttemptNumber=$taskAttemptNumber,
+       |  sqlExecutionId=$sqlExecutionId
+       |)""".stripMargin
+  }
+
+  /** Tuple of stage id, stage attempt id and taskAttemptNumber, defining the order of attempts. */
+  val attempt: (Int, Int, Int) = (stageId, stageAttemptId, taskAttemptNumber)
+}
+
+/**
+ * A trait that can be mixed into a subclass of [[AccumulatorV2]] to track the "logical"
+ * value of the "last attempt" of the execution using the accumulator - aggregated from the last
+ * attempts of any Task that calculated some RDD partitions and used this accumulator, and
+ * discarding any values coming from earlier attempts that have been recomputed.
+ * If the accumulator is used by multiple RDDs, the last attempt value is tracked separately for
+ * each, and can be retrieved for each or all of them separately, see lastAttemptValueForX methods.
+ * If the accumulator is used directly on the Spark Driver using [[AccumulatorV2#add]],
+ * that value is considered the last attempt value.
+ * If the accumulator was both used in Tasks and updated directly on the driver, it can't determine
+ * what should be considered the last attempt, and lastAttemptValueForX methods will return None.
+ *
+ * Contract for driver-only updates:
+ * A driver-side value (set via [[AccumulatorV2#add]] on the driver, outside any Task) is only
+ * returned by methods that do not narrow by RDD, namely [[lastAttemptValueForAllRDDs]] and
+ * [[lastAttemptValueForHighestRDDId]]. Methods that narrow to specific RDDs or RDD scopes
+ * ([[lastAttemptValueForRDDId]], [[lastAttemptValueForRDDIds]], [[lastAttemptValueForRDDScopes]])
+ * return the zero value when a driver-only value is present, because a driver-side update cannot
+ * be attributed to any particular RDD or scope.
+ *
+ * [[LastAttemptAccumulator]] is not reset by the [[AccumulatorV2#reset]] method implementation,
+ * and its state is not copied by the [[AccumulatorV2#copy]] method implementation, and it should
+ * not be serialized to the Executors. The internal state should only be initialized by the
+ * [[initializeLastAttemptAccumulator]] method on the "main" instance of the accumulator, that was
+ * created and registered with [[AccumulatorContext]] with [[AccumulatorV2#register]]. All the
+ * interfaces of [[LastAttemptAccumulator]]: [[mergeLastAttempt]] (used only by DAGScheduler) and
+ * lastAttemptValueForX, [[logAccumulatorState]] (used by the using code) should only be invoked on
+ * that instance, on the Spark Driver.
+ *
+ * The [[LastAttemptAccumulator]] is not thread-safe. [[mergeLastAttempt]] should only be used by
+ * DAGScheduler, by the scheduler thread. Retrieving the value using lastAttemptValueForXXX while
+ * it is concurrently updated (execution is running) can produce some inconsistencies, but should
+ * not crash.
+ * If an RDD using the [[LastAttemptAccumulator]] is used concurrently by multiple actions that
+ * all try to recompute it, it may produce unexpected results and the semantics of what is "last
+ * attempt" becomes ambiguous. This should not be done in practice, and will likely result in more
+ * unexpected behaviours in Spark.
+ *
+ * Implementations must implement [[partialMergeVal]] and [[partialMerge]] methods operating on
+ * PARTIAL type. In regular [[AccumulatorV2]] implementations, the [[AccumulatorV2]] object
+ * itself holds the intermediate value of the accumulator, and [[AccumulatorV2#merge]] method is
+ * used to merge these objects together. [[LastAttemptAccumulator]] needs to keep track of partial
+ * values of every partition of every RDD that used the accumulator, and holding a full
+ * [[AccumulatorV2]] object for each would have a high overhead. Therefore, an implementation should
+ * be able to return PARTIAL value from [[partialMergeVal]] that represents an intermediate
+ * mergeable value, and a [[partialMerge]] method that can merge that value into the accumulator.
+ * Implementations must also implement an [[isMergeable]] method that checks if the other
+ * [[AccumulatorV2]] is of a compatible type to be merged with this using [[partialMergeVal]]. In
+ * regular [[AccumulatorV2]] implementations, this check is normally done inside the
+ * [[AccumulatorV2#merge]] method, which is not used here.
+ *
+ * If an implementation is used to keep user data in the accumulator, it should override
+ * [[accumulatorStoresUserData]] to return true, to ensure correct structured logging annotation.
+ * Otherwise it should override it to false.
+ */
+trait LastAttemptAccumulator[IN, OUT, PARTIAL] extends Logging {
+  this: AccumulatorV2[IN, OUT] =>
+
+  // For every RDD that participated in the computation of this accumulator, keep the partial
+  // value of the accumulator for the latest stage and stage attempt that computed it.
+  // Keyed by rdd.id.
+  // Only kept and accessed on the driver, in the instance of the LastAttemptAccumulator that was
+  // created and registered with AccumulatorContext with AccumulatorV2.register().
+  // Should not be copied / reset by the implementation of copy() / reset() functions.
+  // Transient: only needed on the driver and doesn't need to be serialized.
+  @transient
+  private var lastAttemptRddsMap: LastAttemptMap[Int, LastAttemptRDDVals[PARTIAL]] = _
+
+  // ClassTag for PARTIAL, captured at initialization time.
+  @transient private var partialClassTag: ClassTag[PARTIAL] = _
+
+  // Metric value set directly on the driver, not from within a task.
+  // Only kept and accessed on the driver, in the instance of the LastAttemptAccumulator that was
+  // created and registered with AccumulatorContext with AccumulatorV2.register().
+  // Should not be copied / reset by the implementation of copy() / reset() functions.
+  // Transient: only needed on the driver and doesn't need to be serialized.
+  @transient
+  private var lastAttemptDirectDriverValue: Option[OUT] = _
+
+  // Flipped to true if unexpected metrics updates are received and we can no longer reason
+  // about the last attempt.
+  // Should not be copied / reset by the implementation of copy() / reset() functions.
+  // Transient: only needed on the driver and doesn't need to be serialized.
+  @transient
+  protected var lastAttemptAccumulatorInvalid: Boolean = false
+
+  // Indicates that the LastAttemptAccumulator has been initialized.
+  // It is initialized in assertValid().
+  // Should not be copied / reset by the implementation of copy() / reset() functions.
+  // Transient: only needed on the driver and doesn't need to be serialized.
+  @transient
+  protected var lastAttemptAccumulatorInitialized: Boolean = false
+
+  /** Reset the state of the last attempt accumulator, discarding all the past attempts, and
+   *  making it valid again if it was invalidated. */
+  def resetLastAttemptAccumulator(): Unit = try {
+    lastAttemptRddsMap.clear()
+    lastAttemptDirectDriverValue = None
+    lastAttemptAccumulatorInvalid = false
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in resetLastAttemptAccumulator",
+        exception = Some(e))
+  }
+
+  def initializeLastAttemptAccumulator()(implicit ct: ClassTag[PARTIAL]): Unit = try {
+    assert(isAtDriverSide)
+    assert(!lastAttemptAccumulatorInitialized)
+    assert(!lastAttemptAccumulatorInvalid)
+    assert(lastAttemptRddsMap == null)
+    assert(lastAttemptDirectDriverValue == null)
+    partialClassTag = ct
+    lastAttemptRddsMap = new LastAttemptMap[Int, LastAttemptRDDVals[PARTIAL]]
+    lastAttemptDirectDriverValue = None
+    lastAttemptAccumulatorInitialized = true
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in initializeLastAttemptAccumulator",
+        exception = Some(e))
+  }
+
+  private def accumulatorId: Long = {
+    // This can throw if this is a copy/serialized accumulator,
+    // not the instance registered with AccumulatorContext.
+    // Catch it so we can safely use it for logging in unexpected situations.
+    try {
+      this.id
+    } catch {
+      case NonFatal(e) =>
+        logWarning(log"Unexpected exception in getting accumulator id", e)
+        -1L // needs to be a long for LogKeys.ACCUMULATOR_ID
+    }
+  }
+
+  /** Log entry to log debug information about the internal state of the accumulator. */
+  def logAccumulatorState: LogEntry = try {
+    log"""LastAttemptAccumulator id=${MDC(LogKeys.ACCUMULATOR_ID, accumulatorId)}:
+    |Invalidated: ${MDC(LogKeys.LAST_ATTEMPT_ACC_INVALIDATE, lastAttemptAccumulatorInvalid)}.
+    |Direct driver value: ${MDC(logKeyAccumulatorState, lastAttemptDirectDriverValue)}.
+    |Value: ${MDC(logKeyAccumulatorState, value)}.
+    |lastAttemptRddsMap:
+    |${MDC(logKeyAccumulatorState, lastAttemptRddsMap)}."""
+      .stripMargin
+  } catch {
+    case NonFatal(e) =>
+      logWarning(log"Unexpected exception in logAccumulatorState", e)
+      log"<Unexpected exception in logAccumulatorState>"
+  }
+
+  private def logAccumulatorUpdate(
+      newAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None,
+      oldAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None): LogEntry = try {
+    log"""Old partial RDD value: ${MDC(logKeyAccumulatorState, oldAccumPartialValue)}.
+    |New partial RDD value: ${MDC(logKeyAccumulatorState, newAccumPartialValue)}."""
+      .stripMargin
+  } catch {
+    case NonFatal(e) =>
+      logWarning(log"Unexpected exception in logAccumulatorUpdate", e)
+      log"<Unexpected exception in logAccumulatorUpdate>"
+  }
+
+  private def unexpectedLastAttemptMetricUpdate(
+      invalidate: Boolean,
+      reason: String,
+      exception: Option[Throwable] = None,
+      newAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None,
+      oldAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None): Unit = {
+    val logEntry =
+      log"""Unexpected last attempt tracking for accumulator ${
+        MDC(LogKeys.ACCUMULATOR_ID, accumulatorId)}.
+      |Invalidate: ${MDC(LogKeys.LAST_ATTEMPT_ACC_INVALIDATE, invalidate)}.
+      |Reason: ${MDC(LogKeys.LAST_ATTEMPT_ACC_UNEXPECTED_REASON, reason)}.
+      |""".stripMargin +
+      log"State:\n" + logAccumulatorState +
+      log"Update:\n" + logAccumulatorUpdate(newAccumPartialValue, oldAccumPartialValue)
+    exception match {
+      case Some(e) => logWarning(logEntry, e)
+      case None => logWarning(logEntry)
+    }
+    if (invalidate) {
+      lastAttemptAccumulatorInvalid = true
+    }
+    if (Utils.isTesting && lastAttemptAccumulatorInitialized && exception.isDefined) {
+      // If this is a test, rethrow the exception.
+      // (Rethrow only if lastAttemptAccumulatorInitialized. In some tests, we check for proper
+      // graceful handling of unexpected exceptions in accumulators that are not properly
+      // initialized, so we don't want to throw there.)
+      throw exception.get
+    }
+  }
+
+  protected def unexpectedLastAttemptMetricOperation(
+      invalidate: Boolean,
+      reason: String,
+      exception: Option[Throwable] = None): Unit = {
+    // subclasses don't have visibility of private class AccumulatorPartialVal.
+    unexpectedLastAttemptMetricUpdate(
+      invalidate = invalidate,
+      reason = reason,
+      exception = exception,
+      newAccumPartialValue = None,
+      oldAccumPartialValue = None)
+  }
+
+  /** Set of assertions that should always hold for a valid [[LastAttemptAccumulator]]. */
+  protected def assertValid(): Unit = {
+    assert(lastAttemptAccumulatorInitialized)
+    assert(!lastAttemptAccumulatorInvalid)
+    assert(isAtDriverSide)
+    assert(metadata != null)
+    assert(!metadata.countFailedValues)
+    assert(lastAttemptDirectDriverValue.isEmpty || lastAttemptRddsMap.isEmpty)
+  }
+
+  /**
+   * Accumulator subclasses where metric values can contain user data (for example, maximum of
+   * processed values, observable metrics) as opposed to system measurements (for example, count
+   * of processed rows) should return true to ensure correct structured logging annotation.
+   */
+  protected def accumulatorStoresUserData: Boolean
+
+  protected def logKeyAccumulatorState: LogKey = {
+    if (accumulatorStoresUserData) {
+      LogKeys.LAST_ATTEMPT_ACC_USER_METRIC
+    } else {
+      LogKeys.LAST_ATTEMPT_ACC_SYSTEM_METRIC
+    }
+  }
+
+  /** Return intermediate value of PARTIAL type that can be merged together by partialMerge. */
+  protected def partialMergeVal: PARTIAL
+
+  /** Merge together partial values of PARTIAL type returned by partialMergeVal. */
+  protected def partialMerge(otherVal: PARTIAL): Unit
+
+  /** Check if the other accumulator is mergeable with this one. */
+  protected def isMergeable(other: AccumulatorV2[_, _]): Boolean
+
+  /**
+   * Check if the value is set on the driver side, not from within a task.
+   * This must be called from `add` and `set` methods of any AccumulatorV2 subclass supporting
+   * last attempt metrics to set what the `value` of the metric is after the operation.
+   */
+  protected def setValueIfOnDriverSide(value: OUT): Unit = try {
+    if (isAtDriverSide && lastAttemptAccumulatorInitialized && !lastAttemptAccumulatorInvalid) {
+      // Direct update on the driver, not from within a task.
+      // This gives little information about the source of the update, so we can't reason about
+      // "last attempt" if it's mixed with non-driver updates.
+      lastAttemptDirectDriverValue = Some(value)
+      if (lastAttemptRddsMap.nonEmpty) {
+        unexpectedLastAttemptMetricUpdate(
+          invalidate = true,
+          reason = "Incoming direct driver value while task updates exist")
+      }
+    }
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in setValueIfOnDriverSide",
+        exception = Some(e))
+  }
+
+  /**
+   * It needs Task and Stage information to reason about the last attempt.
+   *
+   * Called from a single thread in DAGScheduler, no synchronization needed.
+   * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that
+   * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]].
+   */
+  private[spark] def mergeLastAttempt(
+      other: AccumulatorV2[_, _],
+      rdd: RDD[_],
+      taskInfo: TaskInfo,
+      stageId: Int,
+      stageAttemptId: Int,
+      localProperties: java.util.Properties): Unit = try {
+    implicit val ct: ClassTag[PARTIAL] = partialClassTag
+    if (lastAttemptAccumulatorInvalid) return
+    // Skip zero-value updates. They contribute nothing to the aggregate and can come
+    // from stages where the accumulator was present in the task closure but never incremented.
+    if (other.isZero) return
+    assertValid()
+
+    if (!isMergeable(other)) {
+      // This should never happen.
+      unexpectedLastAttemptMetricUpdate(
+        invalidate = true,
+        "Merging accumulators of different types")
+      return
+    }
+
+    if (!other.isInstanceOf[LastAttemptAccumulator[_, _, _]]) {
+      // This should never happen.
+      unexpectedLastAttemptMetricUpdate(
+        invalidate = true,
+        "Merging with accumulator which is not SLAM")
+      return
+    }
+    val lastAttemptOther = other
+      .asInstanceOf[LastAttemptAccumulator[IN, OUT, PARTIAL]]
+
+    val update = AccumulatorPartialVal(
+      partialMergeVal = lastAttemptOther.partialMergeVal,
+      rddId = rdd.id,
+      rddPartitionId = taskInfo.partitionId,
+      rddNumPartitions = rdd.getNumPartitions,
+      rddScopeId = rdd.scope.map(_.id),
+      stageId = stageId,
+      stageAttemptId = stageAttemptId,
+      taskAttemptNumber = taskInfo.attemptNumber,
+      sqlExecutionId =
+        Option(localProperties.getProperty(SparkContext.SQL_EXECUTION_ID_KEY)).map(_.toLong))
+
+    if (lastAttemptDirectDriverValue.nonEmpty) {
+      unexpectedLastAttemptMetricUpdate(invalidate = true,
+        "Incoming task updates while direct driver value exists",
+        newAccumPartialValue = Some(update))
+      return
+    }
+
+    lastAttemptRddsMap.get(update.rddId) match {
+      case Some(oldRDDValue) => // This RDD was already seen.
+        val oldValue = oldRDDValue.partialValueAt(update.rddPartitionId)
+
+        logTrace(log"mergeLastAttempt existing RDD update:\n" +
+          log"${MDC(logKeyAccumulatorState, oldRDDValue)}\n" +
+          logAccumulatorUpdate(
+          newAccumPartialValue = Some(update), oldAccumPartialValue = Some(oldValue)))
+
+        // Check basic consistency
+        if (oldValue.rddNumPartitions != update.rddNumPartitions) {
+          unexpectedLastAttemptMetricUpdate(
+            invalidate = true,
+            reason = "RDD with changing number of partitions",
+            newAccumPartialValue = Some(update),
+            oldAccumPartialValue = Some(oldValue))
+          return
+        }
+        if (oldValue.rddScopeId != update.rddScopeId) {
+          unexpectedLastAttemptMetricUpdate(
+            invalidate = true,
+            reason = "RDD with changing RDDOperationScope",
+            newAccumPartialValue = Some(update),
+            oldAccumPartialValue = Some(oldValue))
+          return
+        }
+
+        if (oldRDDValue.isEmptyAt(update.rddPartitionId)) {
+          // No previous attempt for this RDD partition.
+          oldRDDValue.update(update)
+        } else {
+          if (update.attempt > oldValue.attempt) {
+            // New last attempt for this RDD partition.
+            oldRDDValue.update(update)
+          } else if (update.attempt == oldValue.attempt) {
+            // Same attempt, should not happen.
+            unexpectedLastAttemptMetricUpdate(
+              invalidate = true,
+              reason = "Same stage, stageAttemptId and taskAttemptNumber reported multiple times",
+              newAccumPartialValue = Some(update),
+              oldAccumPartialValue = Some(oldValue))
+          }
+          // else: Older attempt reported after newer attempt. Not fatal, discard it.
+        }
+
+      case None => // First time we see this RDD.
+        logTrace(log"mergeLastAttempt new RDD update:\n" + logAccumulatorUpdate(
+          newAccumPartialValue = Some(update), oldAccumPartialValue = None))
+        val newVal = LastAttemptRDDVals.createFromFirstUpdate(update)
+        lastAttemptRddsMap.put(update.rddId, newVal)
+    }
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricUpdate(
+        invalidate = true,
+        reason = "Unexpected exception in mergeLastAttempt",
+        exception = Some(e))
+  }
+
+  /** Accumulates last attempt values from given RDD into an acc. */
+  private def lastAttemptValueAggregateInternal(rddId: Int, acc: this.type) = {
+    // Note: even if the given RDD is not present, we can't tell if it executed but just never
+    // updated this accumulator, so we still report the zero value back.
+    for {
+      lastAttemptVal <- lastAttemptRddsMap.get(rddId)
+      partitionId <- lastAttemptVal.partitionPartialVals.indices
+    } {
+      // Some partitions may not be computed.
+      // May be because of operations like take.
+      // May be because of AQE coalescing executing tasks covering multiple partitions.
+      if (!lastAttemptVal.isEmptyAt(partitionId)) {
+        acc.partialMerge(lastAttemptVal.partitionPartialVals(partitionId))
+      }
+    }
+  }
+
+  /**
+   * Returns the last attempt value of this accumulator, aggregated from a set of RDDs.
+   *
+   * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that
+   * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]].
+   *
+   * @return None if the last attempt value cannot be established, Some(value) otherwise.
+   */
+  def lastAttemptValueForRDDIds(rddIds: Seq[Int]): Option[OUT] = try {
+    if (lastAttemptAccumulatorInvalid) return None
+    assertValid()
+    if (lastAttemptDirectDriverValue.isDefined) {
+      // return zero value if there is no RDD execution recorded.
+      return Some(copyAndReset().asInstanceOf[this.type].value)
+    }
+
+    val acc = copyAndReset().asInstanceOf[this.type]
+    rddIds.distinct.foreach(lastAttemptValueAggregateInternal(_, acc))
+    Some(acc.value)
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in lastAttemptValueForRDDs",
+        exception = Some(e))
+      None
+  }
+
+  /**
+   * Returns the last attempt value of this accumulator, aggregated from a specific RDD.
+   *
+   * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that
+   * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]].
+   *
+   * @return None if the last attempt value cannot be established, Some(value) otherwise.
+   */
+  def lastAttemptValueForRDDId(rddId: Int): Option[OUT] = try {
+    lastAttemptValueForRDDIds(Seq(rddId))
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in lastAttemptValueForRDD",
+        exception = Some(e))
+      None
+  }
+
+  /**
+   * Returns the last attempt value of this accumulator, aggregated from all RDDs that ever
+   * returned any values for it.
+   *
+   * If the metric was used directly on the driver, and was not used in any RDD execution,
+   * the driver value will be used instead.
+   *
+   * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that
+   * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]].
+   *
+   * @return None if the last attempt value cannot be established, Some(value) otherwise.
+   */
+  def lastAttemptValueForAllRDDs(): Option[OUT] = try {
+    if (lastAttemptAccumulatorInvalid) return None
+    assertValid()
+    if (lastAttemptDirectDriverValue.isDefined) return lastAttemptDirectDriverValue
+    lastAttemptValueForRDDIds(lastAttemptRddsMap.keys.toSeq)
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in lastAttemptValueForAllRDDs",
+        exception = Some(e))
+      None
+  }
+
+  /**
+   * Returns the last attempt value of this accumulator, aggregated from the RDD with the highest
+   * id that ever returned any values for it.
+   *
+   * If the metric was used directly on the driver, and was not used in any RDD execution,
+   * the driver value will be used instead.
+   *
+   * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that
+   * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]].
+   *
+   * @return None if the last attempt value cannot be established, Some(value) otherwise.
+   */
+  def lastAttemptValueForHighestRDDId(): Option[OUT] = try {
+    if (lastAttemptAccumulatorInvalid) return None
+    assertValid()
+    if (lastAttemptDirectDriverValue.isDefined) return lastAttemptDirectDriverValue
+
+    if (lastAttemptRddsMap.nonEmpty) {
+      lastAttemptValueForRDDId(lastAttemptRddsMap.keys.max)
+    } else {
+      // return zero value if there is no RDD execution recorded.
+      Some(copyAndReset().asInstanceOf[this.type].value)
+    }
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in lastAttemptValueForHighestRDDId",
+        exception = Some(e))
+      None
+  }
+
+  /**
+   * Returns the last attempt value of this accumulator, aggregated from RDDs with given scope ids.
+   *
+   * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that
+   * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]].
+   *
+   * @return None if the last attempt value cannot be established, Some(value) otherwise.
+   */
+  def lastAttemptValueForRDDScopes(rddScopeIds: Seq[String]): Option[OUT] = try {
+    if (lastAttemptAccumulatorInvalid) return None
+    assertValid()
+    if (lastAttemptDirectDriverValue.isDefined) {
+      // Return zero value if there is no RDD execution recorded.
+      return Some(copyAndReset().asInstanceOf[this.type].value)
+    }
+    val scopesLookup = rddScopeIds.toSet
+    val matchingRDDs = lastAttemptRddsMap.values.filter { rddVal =>
+      rddVal.rddScopeId.exists(scopesLookup.contains)
+    }.toSeq
+    // When multiple RDDs share the same scope (e.g. repeated Dataset.collect() calls create
+    // new wrapper RDDs in the same scope, or BroadcastNestedLoopJoin executing the probe side
+    // twice), only aggregate the latest one per scope, identified by the highest RDD id.
+    // RDD ids are globally monotonic, so the highest id is the latest.
+    val rddIds = matchingRDDs.groupBy(_.rddScopeId).values.map(_.maxBy(_.rddId).rddId).toSeq
+    lastAttemptValueForRDDIds(rddIds)
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in lastAttemptValueForRDDScopes",
+        exception = Some(e))
+      None
+  }
+
+  /** Visible for testing. */
+  def getDirectDriverValue: Option[OUT] = {
+    lastAttemptDirectDriverValue
+  }
+
+  /** Visible for testing */
+  def getHighestRDDId: Option[Int] = {
+    if (lastAttemptRddsMap.nonEmpty) Some(lastAttemptRddsMap.keys.max) else None
+  }
+
+  /** Visible for testing */
+  def getNumRDDs: Int = {
+    lastAttemptRddsMap.keys.size
+  }
+
+  /** Visible for testing */
+  def getValid: Boolean = {
+    !lastAttemptAccumulatorInvalid
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index d22e14d992655..54f0ec6505b81 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkThrowable}
 
 private[spark] object ThreadUtils {
 
@@ -358,10 +358,26 @@ private[spark] object ThreadUtils {
   def awaitResult[T](awaitable: Awaitable[T], atMost: Duration): T = {
     SparkThreadUtils.awaitResult(awaitable, atMost)
   }
+
+  @throws(classOf[SparkException])
+  def awaitResult[T](
+      awaitable: Awaitable[T],
+      atMost: Duration,
+      preserveSparkThrowable: Boolean): T = {
+    SparkThreadUtils.awaitResult(awaitable, atMost, preserveSparkThrowable)
+  }
   // scalastyle:on awaitresult
 
   @throws(classOf[SparkException])
   def awaitResult[T](future: JFuture[T], atMost: Duration): T = {
+    awaitResult(future, atMost, preserveSparkThrowable = false)
+  }
+
+  @throws(classOf[SparkException])
+  def awaitResult[T](
+      future: JFuture[T],
+      atMost: Duration,
+      preserveSparkThrowable: Boolean): T = {
     try {
       atMost match {
         case Duration.Inf => future.get()
@@ -370,6 +386,16 @@ private[spark] object ThreadUtils {
     } catch {
       case e: SparkFatalException =>
         throw e.throwable
+      // JFuture.get() wraps exceptions in ExecutionException. Unwrap and check if the
+      // cause carries a structured condition (SparkThrowable) to preserve the SQL state.
+      case e: ExecutionException
+          if preserveSparkThrowable
+            && e.getCause.isInstanceOf[SparkThrowable]
+            && e.getCause.asInstanceOf[SparkThrowable].getCondition != null =>
+        // Attach the caller's stack trace so it's not lost when re-throwing from a worker thread.
+        e.getCause.addSuppressed(
+          new SparkException("Exception thrown in awaitResult", cause = null))
+        throw e.getCause
       case NonFatal(t)
         if !t.isInstanceOf[TimeoutException] =>
         throw new SparkException("Exception thrown in awaitResult: ", t)
@@ -407,6 +433,11 @@ private[spark] object ThreadUtils {
     }
   }
 
+  /** See the overloaded [[parmap]] for full documentation. */
+  def parmap[I, O](in: Seq[I], prefix: String, maxThreads: Int)(f: I => O): Seq[O] = {
+    parmap(in, prefix, maxThreads, preserveSparkThrowable = false)(f)
+  }
+
   /**
    * Transforms input collection by applying the given function to each element in parallel fashion.
    * Comparing to the map() method of Scala parallel collections, this method can be interrupted
@@ -419,13 +450,19 @@ private[spark] object ThreadUtils {
    * @param in - the input collection which should be transformed in parallel.
    * @param prefix - the prefix assigned to the underlying thread pool.
    * @param maxThreads - maximum number of thread can be created during execution.
+   * @param preserveSparkThrowable if true, re-throw exceptions that already carry a structured
+   *   error class (SparkThrowable) instead of wrapping them in a generic SparkException.
    * @param f - the lambda function will be applied to each element of `in`.
    * @tparam I - the type of elements in the input collection.
    * @tparam O - the type of elements in resulted collection.
    * @return new collection in which each element was given from the input collection `in` by
    *         applying the lambda function `f`.
    */
-  def parmap[I, O](in: Seq[I], prefix: String, maxThreads: Int)(f: I => O): Seq[O] = {
+  def parmap[I, O](
+      in: Seq[I],
+      prefix: String,
+      maxThreads: Int,
+      preserveSparkThrowable: Boolean)(f: I => O): Seq[O] = {
     val pool = newForkJoinPool(prefix, maxThreads)
     try {
       implicit val ec: ExecutionContextExecutor = ExecutionContext.fromExecutor(pool)
@@ -433,7 +470,7 @@ private[spark] object ThreadUtils {
       val futures = in.map(x => Future(f(x)))
       val futureSeq = Future.sequence(futures)
 
-      awaitResult(futureSeq, Duration.Inf)
+      awaitResult(futureSeq, Duration.Inf, preserveSparkThrowable)
     } finally {
       pool.shutdownNow()
     }
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index 9ce43d32c1b10..d59bcfc2bd131 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -20,6 +20,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.BitSet;
 import java.util.LinkedList;
 import java.util.UUID;
 
@@ -36,6 +37,8 @@
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.executor.TaskMetrics;
 import org.apache.spark.internal.config.package$;
+import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.memory.TestMemoryManager;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.serializer.JavaSerializer;
@@ -852,6 +855,86 @@ public void testBoundedMergeWithInterleavedData() throws Exception {
     assertSpillFilesWereCleanedUp();
   }
 
+  @Test
+  public void testBoundedMergeSnapshotIsolatedFromConcurrentSpill() throws Exception {
+    // Verifies the prepareBoundedMerge() seam contract: ctx.snapshot is a defensive
+    // copy frozen at prepare-time, isolated from any later mutation of the live
+    // spillWriters list. The test drives the worst-case scenario by direct sequencing:
+    // an external-trigger spill() (the route a sibling MemoryConsumer takes under
+    // memory pressure) appends a writer to live spillWriters AND rebinds
+    // readingIterator.upstream to read it -- the merger must consume that file exactly
+    // once via readingIterator, not twice via the snapshot.
+    final UnsafeExternalSorter sorter = newSorter();
+    sorter.setSpillMergeFactor(2);
+
+    final int numSpills = 4;
+    final int recordsPerSpill = 8;
+    final int totalSpilled = numSpills * recordsPerSpill;
+    final int inMemRecords = 5;
+    final int totalRecords = totalSpilled + inMemRecords;
+
+    // Build numSpills spills with disjoint, interleaved keys.
+    for (int spill = 0; spill < numSpills; spill++) {
+      for (int j = 0; j < recordsPerSpill; j++) {
+        insertNumber(sorter, spill + j * numSpills);
+      }
+      sorter.spill();
+    }
+    // Leave a few records in memory so readingIterator has unread data that a
+    // concurrent spill() can drain into a new spill file.
+    for (int j = 0; j < inMemRecords; j++) {
+      insertNumber(sorter, totalSpilled + j);
+    }
+
+    // Phase 1: snapshot + publish readingIterator (production order).
+    UnsafeExternalSorter.BoundedMergerContext ctx = sorter.prepareBoundedMerge();
+    assertNotNull(ctx.inMemIter,
+        "readingIterator should be published when inMemSorter has data");
+    final int snapshotSizeBefore = ctx.snapshot.size();
+    final int spillFilesBefore = spillFilesCreated.size();
+
+    // Phase 2: external-trigger spill. Routes through readingIterator.spill():
+    // appends a writer to the live spillWriters AND rebinds readingIterator.upstream.
+    final MemoryConsumer externalTrigger =
+        new MemoryConsumer(taskMemoryManager, MemoryMode.ON_HEAP) {
+          @Override
+          public long spill(long size, MemoryConsumer trigger) {
+            return 0;
+          }
+        };
+    long bytesSpilled = sorter.spill(Long.MAX_VALUE, externalTrigger);
+    assertTrue(bytesSpilled > 0L,
+        "external-trigger spill must fire to exercise the seam contract");
+    // Exactly one new spill file should have been produced by the external-trigger spill.
+    assertEquals(spillFilesBefore + 1, spillFilesCreated.size(),
+        "external-trigger spill should produce exactly one new spill file");
+    // Defensive-copy invariant: the post-spill snapshot is unchanged. A future
+    // refactor that aliases ctx.snapshot to the live spillWriters field instead of
+    // copying it would fail this assertion.
+    assertEquals(snapshotSizeBefore, ctx.snapshot.size(),
+        "ctx.snapshot must be isolated from live spillWriters mutation");
+
+    // Phase 3: merge using the frozen snapshot.
+    UnsafeSorterIterator iter = ctx.merger.merge(ctx.snapshot, ctx.inMemIter);
+
+    // Each input record must appear exactly once: no duplicates, no losses.
+    BitSet seen = new BitSet(totalRecords);
+    int count = 0;
+    while (iter.hasNext()) {
+      iter.loadNext();
+      int v = Platform.getInt(iter.getBaseObject(), iter.getBaseOffset());
+      assertTrue(v >= 0 && v < totalRecords, "record out of range: " + v);
+      assertFalse(seen.get(v), "duplicate record observed: " + v);
+      seen.set(v);
+      count++;
+    }
+    assertEquals(totalRecords, count, "wrong record count");
+    assertEquals(totalRecords, seen.cardinality(), "missing records");
+
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
+  }
+
   @Test
   public void testBoundedMergeWithDuplicateKeys() throws Exception {
     // Multiple spills contain identical keys. Verifies that all duplicates are
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 05709c9bdd756..813de4132ab2d 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -17,9 +17,6 @@
 
 package org.apache.spark
 
-import java.lang.ref.WeakReference
-import java.util.concurrent.TimeUnit
-
 import scala.collection.mutable.HashSet
 import scala.util.Random
 
@@ -96,18 +93,6 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
     rdd
   }
 
-  /** Run GC and make sure it actually has run */
-  protected def runGC(): Unit = {
-    val weakRef = new WeakReference(new Object())
-    val startTimeNs = System.nanoTime()
-    System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC.
-    // Wait until a weak reference object has been GCed
-    while (System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(10) && weakRef.get != null) {
-      System.gc()
-      Thread.sleep(200)
-    }
-  }
-
   protected def cleaner = sc.cleaner.get
 }
 
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 15e150ab8b933..a0f17f8af3f33 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark
 
+import java.lang.ref.WeakReference
+import java.util.concurrent.TimeUnit
+
 import scala.annotation.tailrec
 
 import org.scalactic.source.Position
@@ -97,4 +100,26 @@ abstract class SparkFunSuite
       test(testNamePrefix + s" ${param._1}", testTags: _*)(testFun(param._2))
     }
   }
+
+  /** Run GC and make sure it actually has run. */
+  protected def runGC(): Unit = {
+    val weakRef = new WeakReference(new Object())
+    val startTimeNs = System.nanoTime()
+    System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC.
+    // Wait until a weak reference object has been GCed
+    while (System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(10) && weakRef.get != null) {
+      System.gc()
+      Thread.sleep(200)
+    }
+  }
+
+  /** Run `body`; if it throws OutOfMemoryError, force a GC and retry once. */
+  protected def retryOnOOM[T](body: => T): T = {
+    try body
+    catch {
+      case _: OutOfMemoryError =>
+        runGC()
+        body
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index 518a8c8b3d055..6d2c663a2588e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -23,10 +23,11 @@ import com.fasterxml.jackson.core.JsonParseException
 import org.json4s._
 import org.json4s.jackson.JsonMethods
 
-import org.apache.spark.{JsonTestUtils, SparkFunSuite}
+import org.apache.spark.{JsonTestUtils, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
 import org.apache.spark.deploy.master.{ApplicationInfo, RecoveryState, WorkerInfo}
 import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.util.Utils
 
 class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils {
 
@@ -45,7 +46,7 @@ class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils {
   }
 
   test("writeApplicationDescription") {
-    val output = JsonProtocol.writeApplicationDescription(createAppDesc())
+    val output = JsonProtocol.writeApplicationDescription(createAppDesc(), new SparkConf())
     assertValidJson(output)
     assertValidDataInJson(output, JsonMethods.parse(JsonConstants.appDescJsonStr))
   }
@@ -105,6 +106,38 @@ class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils {
     assertValidDataInJson(output, JsonMethods.parse(JsonConstants.workerStateJsonStr))
   }
 
+  test("SPARK-57098: secrets in executor command are redacted in worker JSON endpoint") {
+    val conf = new SparkConf()
+    val secretEnv = Map(
+      "HADOOP_CREDSTORE_PASSWORD" -> "topsecret",
+      "JAVA_HOME" -> "/usr/lib/jvm/default",
+      "AWS_SECRET_ACCESS_KEY" -> "aws-secret-value")
+    val secretJavaOpts = Seq(
+      "-Dspark.ssl.keyStorePassword=ssl-secret",
+      "-Dspark.executorEnv.PASSWORD=env-secret",
+      "-Xmx2g")
+    val cmd = new Command(
+      "mainClass", List("arg1"), secretEnv, Seq(), Seq(), secretJavaOpts)
+    val appDesc = new ApplicationDescription(
+      "name", Some(4), cmd, "appUiUrl", defaultResourceProfile)
+
+    val output = JsonProtocol.writeApplicationDescription(appDesc, conf)
+    val commandStr = (output \ "command") match {
+      case JString(s) => s
+      case other => fail(s"Expected JString for 'command', got: $other")
+    }
+
+    // Sensitive values are scrubbed.
+    assert(!commandStr.contains("topsecret"))
+    assert(!commandStr.contains("ssl-secret"))
+    assert(!commandStr.contains("env-secret"))
+    assert(!commandStr.contains("aws-secret-value"))
+    assert(commandStr.contains(Utils.REDACTION_REPLACEMENT_TEXT))
+    // Non-sensitive values pass through.
+    assert(commandStr.contains("/usr/lib/jvm/default"))
+    assert(commandStr.contains("-Xmx2g"))
+  }
+
   test("SPARK-46883: writeClusterUtilization") {
     val workers = Array(createWorkerInfo(), createWorkerInfo())
     val activeApps = Array(createAppInfo())
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index cb1906679e550..829010179bda4 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -212,6 +212,10 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
       SparkListenerApplicationEnd(2L)
       )
     logFile2.setReadable(false, false)
+    // setReadable(false) is a no-op for root users since they bypass file
+    // permission checks. Skip the test in that case.
+    assume(!logFile2.canRead, "Test requires the file to be unreadable; " +
+      "skipping when running as root.")
 
     updateAndCheck(provider) { list =>
       list.size should be (1)
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index d6f0bfd237e4d..d153800acf3ff 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -21,7 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 import java.util.Locale
 
 import org.apache.spark.{SparkConf, SparkFunSuite, SparkIllegalArgumentException}
-import org.apache.spark.internal.config.IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED
+import org.apache.spark.internal.config.{IO_COMPRESSION_CODEC, IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED}
 import org.apache.spark.util.Utils
 
 class CompressionCodecSuite extends SparkFunSuite {
@@ -47,7 +47,9 @@ class CompressionCodecSuite extends SparkFunSuite {
 
   test("default compression codec") {
     val codec = CompressionCodec.createCodec(conf)
-    assert(codec.getClass === classOf[LZ4CompressionCodec])
+    assert(codec.getClass.getName ===
+      CompressionCodec.shortCompressionCodecNames(
+        IO_COMPRESSION_CODEC.defaultValueString))
     testCodec(codec)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
index d74bc26999447..59c0a5085e93c 100644
--- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
@@ -26,7 +26,7 @@ import scala.util.Random
 
 import org.scalatest.concurrent.Eventually._
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite, SparkThrowable}
 
 class ThreadUtilsSuite extends SparkFunSuite {
 
@@ -229,4 +229,90 @@ class ThreadUtilsSuite extends SparkFunSuite {
       assert(!t.isAlive)
     }
   }
+
+  test("awaitResult preserves SparkThrowable when flag is true") {
+    import java.io.IOException
+
+    val sparkThrowableEx = new RuntimeException("structured error") with SparkThrowable {
+      override def getCondition: String = "TEST_ERROR_CLASS"
+      override def getMessageParameters: java.util.Map[String, String] =
+        java.util.Collections.emptyMap()
+    }
+
+    // With preserveSparkThrowable=true, SparkThrowable is re-thrown directly.
+    val f1 = Future {
+      throw sparkThrowableEx
+    }(ThreadUtils.sameThread)
+    val caught1 = intercept[RuntimeException] {
+      ThreadUtils.awaitResult(f1, 1.seconds, preserveSparkThrowable = true)
+    }
+    assert(caught1.isInstanceOf[SparkThrowable])
+    assert(caught1.asInstanceOf[SparkThrowable].getCondition == "TEST_ERROR_CLASS")
+    assert(caught1.getSuppressed.nonEmpty)
+
+    // With preserveSparkThrowable=false (default), SparkThrowable is wrapped in SparkException.
+    val f2 = Future {
+      throw sparkThrowableEx
+    }(ThreadUtils.sameThread)
+    val caught2 = intercept[SparkException] {
+      ThreadUtils.awaitResult(f2, 1.seconds)
+    }
+    assert(caught2.getCause.isInstanceOf[SparkThrowable])
+
+    // Plain exceptions are always wrapped regardless of the flag.
+    val plainEx = new IOException("plain error")
+    val f3 = Future {
+      throw plainEx
+    }(ThreadUtils.sameThread)
+    val caught3 = intercept[SparkException] {
+      ThreadUtils.awaitResult(f3, 1.seconds, preserveSparkThrowable = true)
+    }
+    assert(caught3.getCause eq plainEx)
+  }
+
+  test("awaitResult (JFuture) preserves SparkThrowable when flag is true") {
+    val sparkThrowableEx = new RuntimeException("structured error") with SparkThrowable {
+      override def getCondition: String = "TEST_ERROR_CLASS"
+      override def getMessageParameters: java.util.Map[String, String] =
+        java.util.Collections.emptyMap()
+    }
+
+    // scalastyle:off sparkThreadPools
+    val jfuture = new java.util.concurrent.CompletableFuture[String]()
+    // scalastyle:on sparkThreadPools
+    jfuture.completeExceptionally(sparkThrowableEx)
+
+    val caught = intercept[RuntimeException] {
+      ThreadUtils.awaitResult(jfuture, 10.seconds, preserveSparkThrowable = true)
+    }
+    assert(caught.isInstanceOf[SparkThrowable])
+    assert(caught.asInstanceOf[SparkThrowable].getCondition == "TEST_ERROR_CLASS")
+    assert(caught.getSuppressed.nonEmpty)
+  }
+
+  test("parmap preserves SparkThrowable when flag is true") {
+    val sparkThrowableEx = new RuntimeException("structured error") with SparkThrowable {
+      override def getCondition: String = "TEST_ERROR_CLASS"
+      override def getMessageParameters: java.util.Map[String, String] =
+        java.util.Collections.emptyMap()
+    }
+
+    // With preserveSparkThrowable=true, the original SparkThrowable is re-thrown.
+    val caught1 = intercept[RuntimeException] {
+      ThreadUtils.parmap(Seq(1), "test", 1, preserveSparkThrowable = true) { _ =>
+        throw sparkThrowableEx
+      }
+    }
+    assert(caught1.isInstanceOf[SparkThrowable])
+    assert(caught1.asInstanceOf[SparkThrowable].getCondition == "TEST_ERROR_CLASS")
+    assert(caught1.getSuppressed.nonEmpty)
+
+    // With preserveSparkThrowable=false, it is wrapped in SparkException.
+    val caught2 = intercept[SparkException] {
+      ThreadUtils.parmap(Seq(1), "test", 1, preserveSparkThrowable = false) { _ =>
+        throw sparkThrowableEx
+      }
+    }
+    assert(caught2.getCause.isInstanceOf[SparkThrowable])
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index e87f3ad026491..4bb46959cef89 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -527,18 +527,24 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
     val scenario4 = new File(testDir, "scenario4")
     assert(testDir.canWrite)
     assert(testDir.setWritable(false))
-    assert(!Utils.createDirectory(scenario4))
-    assert(!scenario4.exists())
-    assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario4"))
+    // Skip when write permission cannot actually be revoked (e.g., running as root).
+    if (!testDir.canWrite) {
+      assert(!Utils.createDirectory(scenario4))
+      assert(!scenario4.exists())
+      assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario4"))
+    }
     assert(testDir.setWritable(true))
 
     // 5. The parent directory cannot execute
     val scenario5 = new File(testDir, "scenario5")
     assert(testDir.canExecute)
     assert(testDir.setExecutable(false))
-    assert(!Utils.createDirectory(scenario5))
-    assert(!scenario5.exists())
-    assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario5"))
+    // Skip when execute permission cannot actually be revoked (e.g., running as root).
+    if (!testDir.canExecute) {
+      assert(!Utils.createDirectory(scenario5))
+      assert(!scenario5.exists())
+      assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario5"))
+    }
     assert(testDir.setExecutable(true))
 
     // The following 3 scenarios are only for the method: createDirectory(File)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index 7551327d704b4..2767769924bc8 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -71,7 +71,6 @@ class SorterSuite extends SparkFunSuite {
   }
 
   test("java.lang.ArrayIndexOutOfBoundsException in TimSort") {
-    System.gc()
     // scalastyle:off
     val runLengths = Array(76405736, 74830360, 1181532, 787688, 1575376, 2363064, 3938440, 6301504,
       1181532, 393844, 15753760, 1575376, 787688, 393844, 1969220, 3150752, 1181532,787688, 5513816, 3938440,
@@ -140,7 +139,10 @@ class SorterSuite extends SparkFunSuite {
       21, 20, 22, 18, 452, 114, 95, 18, 17, 21, 36, 18, 17, 115, 76, 144, 44, 38, 61,20, 19, 21, 17)
     // scalastyle:on
     val arrayToSortSize = 1091482190
-    val arrayToSort = new Array[Byte](arrayToSortSize)
+    // Memory held by the previous test (e.g. the ~256 MB int array in "SPARK-5984
+    // TimSort bug") may not be reclaimed before this >1 GB allocation, causing flaky
+    // OOM in CI. Force a GC and retry once on OOM.
+    val arrayToSort = retryOnOOM(new Array[Byte](arrayToSortSize))
     var sum: Int = -1
     for (i <- runLengths) {
       sum += i
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index 9925ae406dbd9..55b1ed40cb9ed 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -70,4 +70,6 @@
       files="src/test/java/org/apache/spark/util/collection/TestTimSort.java" />
     <suppress checks=".*"
               files="src/main/java/org/apache/spark/sql/execution/streaming/state/StateMessage.java"/>
+    <suppress checks=".*"
+              files="src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java"/>
 </suppressions>
diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh
index 43c198301b702..9d93e49e014e4 100755
--- a/dev/create-release/release-tag.sh
+++ b/dev/create-release/release-tag.sh
@@ -84,7 +84,6 @@ fi
 # Set the release version in docs
 sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml
 sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml
-sed -i".tmp3" "s/'facetFilters':.*$/'facetFilters': [\"version:$RELEASE_VERSION\"]/g" docs/_config.yml
 sed -i".tmp4" 's/__version__: str = .*$/__version__: str = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py
 
 git commit -a -m "Preparing Spark release $RELEASE_TAG"
@@ -104,8 +103,6 @@ sed -i".tmp6" 's/__version__: str = .*$/__version__: str = "'"$R_NEXT_VERSION.de
 sed -i".tmp7" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$NEXT_VERSION"'/g' docs/_config.yml
 # Use R version for short version
 sed -i".tmp8" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$R_NEXT_VERSION"'/g' docs/_config.yml
-# Update the version index of DocSearch as the short version
-sed -i".tmp9" "s/'facetFilters':.*$/'facetFilters': [\"version:$R_NEXT_VERSION\"]/g" docs/_config.yml
 
 git commit -a -m "Preparing development version $NEXT_VERSION"
 
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index aaf9679e34f61..a8a9fd81768e2 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -194,35 +194,35 @@ metrics-jmx/4.2.37//metrics-jmx-4.2.37.jar
 metrics-json/4.2.37//metrics-json-4.2.37.jar
 metrics-jvm/4.2.37//metrics-jvm-4.2.37.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.2.12.Final//netty-all-4.2.12.Final.jar
-netty-buffer/4.2.12.Final//netty-buffer-4.2.12.Final.jar
-netty-codec-base/4.2.12.Final//netty-codec-base-4.2.12.Final.jar
-netty-codec-compression/4.2.12.Final//netty-codec-compression-4.2.12.Final.jar
-netty-codec-dns/4.2.12.Final//netty-codec-dns-4.2.12.Final.jar
-netty-codec-http/4.2.12.Final//netty-codec-http-4.2.12.Final.jar
-netty-codec-http2/4.2.12.Final//netty-codec-http2-4.2.12.Final.jar
-netty-codec-socks/4.2.12.Final//netty-codec-socks-4.2.12.Final.jar
-netty-codec/4.2.12.Final//netty-codec-4.2.12.Final.jar
-netty-common/4.2.12.Final//netty-common-4.2.12.Final.jar
-netty-handler-proxy/4.2.12.Final//netty-handler-proxy-4.2.12.Final.jar
-netty-handler/4.2.12.Final//netty-handler-4.2.12.Final.jar
-netty-resolver-dns/4.2.12.Final//netty-resolver-dns-4.2.12.Final.jar
-netty-resolver/4.2.12.Final//netty-resolver-4.2.12.Final.jar
+netty-all/4.2.13.Final//netty-all-4.2.13.Final.jar
+netty-buffer/4.2.13.Final//netty-buffer-4.2.13.Final.jar
+netty-codec-base/4.2.13.Final//netty-codec-base-4.2.13.Final.jar
+netty-codec-compression/4.2.13.Final//netty-codec-compression-4.2.13.Final.jar
+netty-codec-dns/4.2.13.Final//netty-codec-dns-4.2.13.Final.jar
+netty-codec-http/4.2.13.Final//netty-codec-http-4.2.13.Final.jar
+netty-codec-http2/4.2.13.Final//netty-codec-http2-4.2.13.Final.jar
+netty-codec-socks/4.2.13.Final//netty-codec-socks-4.2.13.Final.jar
+netty-codec/4.2.13.Final//netty-codec-4.2.13.Final.jar
+netty-common/4.2.13.Final//netty-common-4.2.13.Final.jar
+netty-handler-proxy/4.2.13.Final//netty-handler-proxy-4.2.13.Final.jar
+netty-handler/4.2.13.Final//netty-handler-4.2.13.Final.jar
+netty-resolver-dns/4.2.13.Final//netty-resolver-dns-4.2.13.Final.jar
+netty-resolver/4.2.13.Final//netty-resolver-4.2.13.Final.jar
 netty-tcnative-boringssl-static/2.0.76.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.76.Final-linux-aarch_64.jar
 netty-tcnative-boringssl-static/2.0.76.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.76.Final-linux-x86_64.jar
 netty-tcnative-boringssl-static/2.0.76.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.76.Final-osx-aarch_64.jar
 netty-tcnative-boringssl-static/2.0.76.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.76.Final-osx-x86_64.jar
 netty-tcnative-boringssl-static/2.0.76.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.76.Final-windows-x86_64.jar
 netty-tcnative-classes/2.0.76.Final//netty-tcnative-classes-2.0.76.Final.jar
-netty-transport-classes-epoll/4.2.12.Final//netty-transport-classes-epoll-4.2.12.Final.jar
-netty-transport-classes-kqueue/4.2.12.Final//netty-transport-classes-kqueue-4.2.12.Final.jar
-netty-transport-native-epoll/4.2.12.Final/linux-aarch_64/netty-transport-native-epoll-4.2.12.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.2.12.Final/linux-riscv64/netty-transport-native-epoll-4.2.12.Final-linux-riscv64.jar
-netty-transport-native-epoll/4.2.12.Final/linux-x86_64/netty-transport-native-epoll-4.2.12.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.2.12.Final/osx-aarch_64/netty-transport-native-kqueue-4.2.12.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.2.12.Final/osx-x86_64/netty-transport-native-kqueue-4.2.12.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.2.12.Final//netty-transport-native-unix-common-4.2.12.Final.jar
-netty-transport/4.2.12.Final//netty-transport-4.2.12.Final.jar
+netty-transport-classes-epoll/4.2.13.Final//netty-transport-classes-epoll-4.2.13.Final.jar
+netty-transport-classes-kqueue/4.2.13.Final//netty-transport-classes-kqueue-4.2.13.Final.jar
+netty-transport-native-epoll/4.2.13.Final/linux-aarch_64/netty-transport-native-epoll-4.2.13.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.2.13.Final/linux-riscv64/netty-transport-native-epoll-4.2.13.Final-linux-riscv64.jar
+netty-transport-native-epoll/4.2.13.Final/linux-x86_64/netty-transport-native-epoll-4.2.13.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.2.13.Final/osx-aarch_64/netty-transport-native-kqueue-4.2.13.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.2.13.Final/osx-x86_64/netty-transport-native-kqueue-4.2.13.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.2.13.Final//netty-transport-native-unix-common-4.2.13.Final.jar
+netty-transport/4.2.13.Final//netty-transport-4.2.13.Final.jar
 objenesis/3.5//objenesis-3.5.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.17.6//okio-1.17.6.jar
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index 1cfc22acc2302..57cde202dde8d 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -70,6 +70,7 @@ RUN apt-get update && apt-get install -y \
     software-properties-common \
     wget \
     zlib1g-dev \
+    zstd \
     && rm -rf /var/lib/apt/lists/*
 
 
@@ -108,7 +109,7 @@ RUN add-apt-repository ppa:deadsnakes/ppa
 RUN apt-get update && apt-get install -y \
     python3.9 python3.9-distutils \
     && rm -rf /var/lib/apt/lists/*
-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+RUN curl -sS https://bootstrap.pypa.io/pip/3.9/get-pip.py | python3.9
 RUN python3.9 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow needs this
 RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
     python3.9 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index a780e173adb7b..27d7728099bbc 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -32,6 +32,21 @@ set -x
 SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)"
 DISTDIR="$SPARK_HOME/dist"
 
+# The Apache LICENSE and NOTICE are copied into the Python and R package
+# directories below so they are bundled into the source distributions. Remove
+# them on exit so a failed build does not leave stray files behind.
+function cleanup_dist_license_files {
+  rm -f "$SPARK_HOME/python/LICENSE" "$SPARK_HOME/python/NOTICE" \
+        "$SPARK_HOME/R/pkg/LICENSE" "$SPARK_HOME/R/pkg/NOTICE"
+  # Restore the SparkR DESCRIPTION if a release build patched it in place (see
+  # the R packaging section). Guards against an interrupted build leaving the
+  # tracked DESCRIPTION modified.
+  if [ -f "$SPARK_HOME/R/DESCRIPTION.orig" ]; then
+    mv -f "$SPARK_HOME/R/DESCRIPTION.orig" "$SPARK_HOME/R/pkg/DESCRIPTION"
+  fi
+}
+trap cleanup_dist_license_files EXIT
+
 MAKE_TGZ=false
 MAKE_PIP=false
 MAKE_R=false
@@ -169,7 +184,6 @@ fi
 cd "$SPARK_HOME"
 
 if [ "$SBT_ENABLED" == "true" ] ; then
-  export NOLINT_ON_COMPILE=1
   # Store the command as an array because $SBT variable might have spaces in it.
   # Normal quoting tricks don't work.
   # See: http://mywiki.wooledge.org/BashFAQ/050
@@ -204,6 +218,11 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE"
 # Copy jars
 cp -r "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
 
+# SPARK-53327: Use the modified ResourceImpl.class in spark-catalyst which is compatible with Java 25
+if [ -f "$DISTDIR"/jars/datasketches-memory-3.0.2.jar ]; then
+  zip -d "$DISTDIR"/jars/datasketches-memory-3.0.2.jar org/apache/datasketches/memory/internal/ResourceImpl.class
+fi
+
 # Only create the yarn directory if the yarn artifacts were built.
 if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
   mkdir "$DISTDIR/yarn"
@@ -255,9 +274,39 @@ if [ "$MAKE_PIP" == "true" ]; then
   pushd "$SPARK_HOME/python" > /dev/null
   # Delete the egg info file if it exists, this can cache older setup files.
   rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
+  # Ship the Apache LICENSE and NOTICE inside the PySpark source distributions
+  # (see MANIFEST.in). These are removed again after the sdists are built.
+  #
+  # The classic pyspark sdist bundles the assembly jars (packaging/classic/setup.py
+  # builds a deps/jars symlink farm), so it ships the binary LICENSE/NOTICE that
+  # enumerate the bundled third-party jars' licenses, mirroring the binary
+  # distribution above. The connect and client sdists bundle no jars and ship the
+  # plain source LICENSE/NOTICE.
+  if [ -e "$SPARK_HOME/LICENSE-binary" ]; then
+    cp "$SPARK_HOME/LICENSE-binary" LICENSE
+    cp "$SPARK_HOME/NOTICE-binary" NOTICE
+  else
+    cp "$SPARK_HOME/LICENSE" LICENSE
+    cp "$SPARK_HOME/NOTICE" NOTICE
+  fi
   python3 packaging/classic/setup.py sdist
+
+  cp "$SPARK_HOME/LICENSE" LICENSE
+  cp "$SPARK_HOME/NOTICE" NOTICE
   python3 packaging/connect/setup.py sdist
   python3 packaging/client/setup.py sdist
+  rm -f LICENSE NOTICE
+
+  # Guard against regressions: every PySpark sdist must contain LICENSE and NOTICE
+  # at the package root. The missing files were only caught by a Spark 4.2.0 RC1
+  # vote -1 (SPARK-57393); fail the release build here instead of at vote time.
+  for f in dist/pyspark*.tar.gz; do
+    listing=$(tar tzf "$f")
+    for required in LICENSE NOTICE; do
+      grep -qE "^[^/]+/$required\$" <<< "$listing" || \
+        { echo "ERROR: $f is missing $required at the package root"; exit 1; }
+    done
+  done
   popd > /dev/null
 else
   echo "Skipping building python distribution package"
@@ -268,9 +317,33 @@ if [ "$MAKE_R" == "true" ]; then
   echo "Building R source package"
   R_PACKAGE_VERSION=`grep Version "$SPARK_HOME/R/pkg/DESCRIPTION" | awk '{print $NF}'`
   pushd "$SPARK_HOME/R" > /dev/null
+  # Ship the Apache LICENSE and NOTICE inside the SparkR source package. These
+  # are removed again after the package is built.
+  cp "$SPARK_HOME/LICENSE" pkg/LICENSE
+  cp "$SPARK_HOME/NOTICE" pkg/NOTICE
+  # Reference the bundled LICENSE from DESCRIPTION so `R CMD check --as-cran` does
+  # not emit "File LICENSE is not mentioned in the DESCRIPTION file". The committed
+  # DESCRIPTION is left untouched because SparkR CI runs check-cran.sh without the
+  # LICENSE file present; this edit is transient and restored after the build (and
+  # by the EXIT trap on failure). The backup lives outside pkg/ so R CMD check does
+  # not flag it as a non-standard file. NOTE: the "Non-standard file 'NOTICE'" note
+  # cannot be silenced this way and is expected.
+  cp pkg/DESCRIPTION "$SPARK_HOME/R/DESCRIPTION.orig"
+  sed 's/^License: Apache License (== 2.0)$/License: Apache License (== 2.0) + file LICENSE/' \
+    "$SPARK_HOME/R/DESCRIPTION.orig" > pkg/DESCRIPTION
   # Build source package and run full checks
   # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
   NO_TESTS=1 "$SPARK_HOME/R/check-cran.sh"
+  mv -f "$SPARK_HOME/R/DESCRIPTION.orig" pkg/DESCRIPTION
+  rm -f pkg/LICENSE pkg/NOTICE
+
+  # Guard against regressions: the SparkR source package must contain LICENSE and
+  # NOTICE at the package root (SPARK-57393).
+  listing=$(tar tzf "SparkR_$R_PACKAGE_VERSION.tar.gz")
+  for required in LICENSE NOTICE; do
+    grep -qE "^[^/]+/$required\$" <<< "$listing" || \
+      { echo "ERROR: SparkR source package is missing $required"; exit 1; }
+  done
 
   # Move R source package to match the Spark release version if the versions are not the same.
   # NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 0b7a90694385f..685621193dd66 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -644,7 +644,8 @@ def main():
         run_build_tests()
 
     # spark build
-    build_apache_spark(build_tool, extra_profiles)
+    if os.environ.get("SKIP_SCALA_BUILD", "false") != "true":
+        build_apache_spark(build_tool, extra_profiles)
 
     # backwards compatibility checks
     if build_tool == "sbt":
@@ -653,7 +654,8 @@ def main():
             detect_binary_inop_with_mima(extra_profiles)
         # Since we did not build assembly/package before running dev/mima, we need to
         # do it here because the tests still rely on it; see SPARK-13294 for details.
-        build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
+        if os.environ.get("SKIP_SCALA_BUILD", "false") != "true":
+            build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
 
     # run the test suites
     run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
diff --git a/dev/scalastyle b/dev/scalastyle
index 0428453b62c81..09e6c2372614d 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -30,6 +30,62 @@ ERRORS=$(echo -e "q\n" \
 
 if test ! -z "$ERRORS"; then
     echo -e "Scalastyle checks failed at following occurrences:\n$ERRORS"
+    # When running under GitHub Actions, also emit each scalastyle violation as
+    # a workflow `::error` annotation so it appears inline on the PR's "Files
+    # changed" tab. Without this, a violation cascades into ~7 red CI checks
+    # (Linters, Java 17/25 Maven build, Documentation generation, sparkr,
+    # Docker integration, TPC-DS) -- all needing catalyst to compile -- and
+    # each only surfaces a generic "exit code 1" with no file/line, forcing
+    # the user to download a full job log to find the actual violation.
+    if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then
+        # Strip ANSI color codes from the captured output before regex
+        # matching. Today sbt under awk's pipe is not a TTY and skips color,
+        # so the input is already plain. But if sbt color is ever forced
+        # (`-Dsbt.color=always`, custom CI shell), `\e[31m` would silently
+        # break every regex below. Cheap to harden.
+        ERRORS_PLAIN=$(printf '%s' "$ERRORS" | sed -E $'s/\x1b\\[[0-9;]*[A-Za-z]//g')
+        # Helper: emit one `::error` annotation. Centralised so the two regex
+        # branches below stay short.
+        emit_annotation() {
+            local file="$1" lineno="$2" msg="$3"
+            # Strip the GitHub Actions workspace prefix so the annotation
+            # references the path as it appears in the repo.
+            local file_rel="${file#${GITHUB_WORKSPACE:-}/}"
+            # Escape the few characters GitHub reserves in annotation values:
+            # %, \r, \n. (`,` and `:` need not be escaped in the message body,
+            # only inside parameter values, which we don't use.)
+            local msg_escaped="${msg//%/%25}"
+            msg_escaped="${msg_escaped//$'\r'/%0D}"
+            msg_escaped="${msg_escaped//$'\n'/%0A}"
+            printf '::error file=%s,line=%s,title=Scalastyle::%s\n' \
+                "$file_rel" "$lineno" "$msg_escaped"
+        }
+        printf '%s\n' "$ERRORS_PLAIN" | while IFS= read -r raw; do
+            # Two scalastyle output formats reach us:
+            #
+            # (a) scalastyle's native console writer (`Tasks.doScalastyle` when
+            #     invoked by the explicit `scalastyle` / `test:scalastyle`
+            #     tasks):
+            #         error file=<path> message=<text> line=<n> [column=<n>]
+            #     The path has no spaces, the message can; `column=<n>` is
+            #     appended for checkers that report a column (e.g.
+            #     `WhitespaceEndOfLineChecker`) and absent otherwise.
+            #
+            # (b) sbt's logger format, used when `Tasks.doScalastyle` writes
+            #     through `streams.value.log.error(...)` -- which is what the
+            #     explicit `scalastyle` / `test:scalastyle` tasks invoked by
+            #     this script do, and so this is the format we see in CI:
+            #         [error] <path>:<line>: <message>
+            #     The leading `[error] ` plus a single `:<line>:` (with no
+            #     `:<col>:` follow-up) is what tells it apart from a regular
+            #     Scala compile error of shape `[error] <path>:<line>:<col>: <msg>`.
+            if [[ "$raw" =~ ^error[[:space:]]+file=([^[:space:]]+)[[:space:]]+message=(.*)[[:space:]]+line=([0-9]+)([[:space:]]+column=[0-9]+)?$ ]]; then
+                emit_annotation "${BASH_REMATCH[1]}" "${BASH_REMATCH[3]}" "${BASH_REMATCH[2]}"
+            elif [[ "$raw" =~ ^\[error\][[:space:]]+(/[^:[:space:]]+):([0-9]+):[[:space:]]+(.+)$ ]]; then
+                emit_annotation "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" "${BASH_REMATCH[3]}"
+            fi
+        done
+    fi
     exit 1
 else
     echo -e "Scalastyle checks passed."
diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile
index 3b02e2ae1cffe..f58168627dd12 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -70,6 +70,7 @@ RUN apt-get update && apt-get install -y \
     software-properties-common \
     wget \
     zlib1g-dev \
+    zstd \
     && apt-get autoremove --purge -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
diff --git a/dev/spark-test-image/lint/Dockerfile b/dev/spark-test-image/lint/Dockerfile
index 14a0e4fcfe75b..b7cecae654cb9 100644
--- a/dev/spark-test-image/lint/Dockerfile
+++ b/dev/spark-test-image/lint/Dockerfile
@@ -63,6 +63,7 @@ RUN apt-get update && apt-get install -y \
     software-properties-common \
     wget \
     zlib1g-dev \
+    zstd \
     && apt-get autoremove --purge -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
@@ -95,8 +96,8 @@ RUN python3.12 -m pip install \
     'mypy==1.19.1' \
     'numpy==2.4.1' \
     'numpydoc' \
-    'pandas' \
-    'pandas-stubs' \
+    'pandas==2.3.3' \
+    'pandas-stubs==2.3.3.260113' \
     'plotly>=4.8' \
     'pyarrow>=23.0.0' \
     'pytest-mypy-plugins==1.9.3' \
diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile
index 8e5044aeb954e..e39e9fac70d22 100644
--- a/dev/spark-test-image/python-311/Dockerfile
+++ b/dev/spark-test-image/python-311/Dockerfile
@@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \
     pkg-config \
     tzdata \
     software-properties-common \
-    zlib1g-dev
+    zlib1g-dev \
+    zstd
 
 # Install Python 3.11
 RUN add-apt-repository ppa:deadsnakes/ppa
diff --git a/dev/spark-test-image/python-312-classic-only/Dockerfile b/dev/spark-test-image/python-312-classic-only/Dockerfile
index da2b99cd7838d..ceb4694b2dc9d 100644
--- a/dev/spark-test-image/python-312-classic-only/Dockerfile
+++ b/dev/spark-test-image/python-312-classic-only/Dockerfile
@@ -49,6 +49,7 @@ RUN apt-get update && apt-get install -y \
     tzdata \
     software-properties-common \
     zlib1g-dev \
+    zstd \
     && apt-get autoremove --purge -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
diff --git a/dev/spark-test-image/python-312-pandas-3/Dockerfile b/dev/spark-test-image/python-312-pandas-3/Dockerfile
index c54a8c284a6b9..e2a2c189df15b 100644
--- a/dev/spark-test-image/python-312-pandas-3/Dockerfile
+++ b/dev/spark-test-image/python-312-pandas-3/Dockerfile
@@ -52,6 +52,7 @@ RUN apt-get update && apt-get install -y \
     tzdata \
     software-properties-common \
     zlib1g-dev \
+    zstd \
     && apt-get autoremove --purge -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile
index 8eee9e414f1d8..72b40510b8db5 100644
--- a/dev/spark-test-image/python-312/Dockerfile
+++ b/dev/spark-test-image/python-312/Dockerfile
@@ -48,6 +48,7 @@ RUN apt-get update && apt-get install -y \
     pkg-config \
     tzdata \
     software-properties-common \
+    zstd \
     zlib1g-dev \
     && apt-get autoremove --purge -y \
     && apt-get clean \
diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile
index 6cfdd2d5a86ed..c13e364f15897 100644
--- a/dev/spark-test-image/python-313/Dockerfile
+++ b/dev/spark-test-image/python-313/Dockerfile
@@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \
     pkg-config \
     tzdata \
     software-properties-common \
-    zlib1g-dev
+    zlib1g-dev \
+    zstd
 
 # Install Python 3.13
 RUN add-apt-repository ppa:deadsnakes/ppa
diff --git a/dev/spark-test-image/python-314-nogil/Dockerfile b/dev/spark-test-image/python-314-nogil/Dockerfile
index edfea31729928..6dea9c2fc35ed 100644
--- a/dev/spark-test-image/python-314-nogil/Dockerfile
+++ b/dev/spark-test-image/python-314-nogil/Dockerfile
@@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \
     pkg-config \
     tzdata \
     software-properties-common \
-    zlib1g-dev
+    zlib1g-dev \
+    zstd
 
 # Install Python 3.14 (no GIL)
 RUN add-apt-repository ppa:deadsnakes/ppa
diff --git a/dev/spark-test-image/python-314/Dockerfile b/dev/spark-test-image/python-314/Dockerfile
index b6bc76c6b93b4..2f3570ec06876 100644
--- a/dev/spark-test-image/python-314/Dockerfile
+++ b/dev/spark-test-image/python-314/Dockerfile
@@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \
     pkg-config \
     tzdata \
     software-properties-common \
-    zlib1g-dev
+    zlib1g-dev \
+    zstd
 
 # Install Python 3.14
 RUN add-apt-repository ppa:deadsnakes/ppa
diff --git a/dev/spark-test-image/python-minimum/Dockerfile b/dev/spark-test-image/python-minimum/Dockerfile
index 72abd5f5811b9..d2e4a83ce81e1 100644
--- a/dev/spark-test-image/python-minimum/Dockerfile
+++ b/dev/spark-test-image/python-minimum/Dockerfile
@@ -47,7 +47,8 @@ RUN apt-get update && apt-get install -y \
     pkg-config \
     tzdata \
     software-properties-common \
-    zlib1g-dev
+    zlib1g-dev \
+    zstd
 
 # Install Python 3.10
 RUN add-apt-repository ppa:deadsnakes/ppa
diff --git a/dev/spark-test-image/python-ps-minimum/Dockerfile b/dev/spark-test-image/python-ps-minimum/Dockerfile
deleted file mode 100644
index 0f970202edd52..0000000000000
--- a/dev/spark-test-image/python-ps-minimum/Dockerfile
+++ /dev/null
@@ -1,70 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Image for building and testing Spark branches. Based on Ubuntu 24.04.
-# See also in https://hub.docker.com/_/ubuntu
-FROM ubuntu:noble
-LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
-LABEL org.opencontainers.image.licenses="Apache-2.0"
-LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For Pandas API on Spark with old dependencies"
-# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
-LABEL org.opencontainers.image.version=""
-
-ENV FULL_REFRESH_DATE=20260210
-
-ENV DEBIAN_FRONTEND=noninteractive
-ENV DEBCONF_NONINTERACTIVE_SEEN=true
-
-RUN printf 'Types: deb\nURIs: https://mirrors.edge.kernel.org/ubuntu\nSuites: noble noble-updates noble-security\nComponents: main restricted universe multiverse\nSigned-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg\n' > /etc/apt/sources.list.d/mirror.sources
-
-# Should keep the installation consistent with https://apache.github.io/spark/api/python/getting_started/install.html
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    ca-certificates \
-    curl \
-    gfortran \
-    git \
-    gnupg \
-    libgit2-dev \
-    liblapack-dev \
-    libopenblas-dev \
-    libssl-dev \
-    openjdk-17-jdk-headless \
-    pkg-config \
-    tzdata \
-    software-properties-common \
-    zlib1g-dev
-
-# Install Python 3.10
-RUN add-apt-repository ppa:deadsnakes/ppa
-RUN apt-get update && apt-get install -y \
-    python3.10 \
-    python3.10-venv \
-    && apt-get autoremove --purge -y \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-# Setup virtual environment
-ENV VIRTUAL_ENV=/opt/spark-venv
-RUN python3.10 -m venv $VIRTUAL_ENV
-ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-
-ARG BASIC_PIP_PKGS="pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting psutil"
-ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf==6.33.5"
-
-RUN python3.10 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \
-    python3.10 -m pip cache purge
diff --git a/dev/spark-test-image/sparkr/Dockerfile b/dev/spark-test-image/sparkr/Dockerfile
index 07816add74fca..8a03095aef7ee 100644
--- a/dev/spark-test-image/sparkr/Dockerfile
+++ b/dev/spark-test-image/sparkr/Dockerfile
@@ -62,6 +62,7 @@ RUN apt-get update && apt-get install -y \
     software-properties-common \
     wget \
     zlib1g-dev \
+    zstd \
     && rm -rf /var/lib/apt/lists/*
 
 RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index bd7d1f55aaee5..c26529951cc78 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -449,7 +449,7 @@ def __hash__(self):
 
 pipelines = Module(
     name="pipelines",
-    dependencies=[],
+    dependencies=[sql],
     source_file_regexes=["sql/pipelines"],
     sbt_test_goals=[
         "pipelines/test",
@@ -611,6 +611,7 @@ def __hash__(self):
         "pyspark.sql.tests.test_readwriter",
         "pyspark.sql.tests.test_serde",
         "pyspark.sql.tests.test_session",
+        "pyspark.sql.tests.test_nearest_by_join",
         "pyspark.sql.tests.test_subquery",
         "pyspark.sql.tests.test_types",
         "pyspark.sql.tests.test_geographytype",
@@ -1173,6 +1174,7 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_observation",
         "pyspark.sql.tests.connect.test_parity_repartition",
         "pyspark.sql.tests.connect.test_parity_stat",
+        "pyspark.sql.tests.connect.test_parity_nearest_by_join",
         "pyspark.sql.tests.connect.test_parity_subquery",
         "pyspark.sql.tests.connect.test_parity_types",
         "pyspark.sql.tests.connect.test_parity_column",
@@ -1187,7 +1189,6 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_python_datasource",
         "pyspark.sql.tests.connect.test_parity_frame_plot",
         "pyspark.sql.tests.connect.test_parity_frame_plot_plotly",
-        "pyspark.sql.tests.connect.test_session",
         "pyspark.sql.tests.connect.test_utils",
         "pyspark.sql.tests.connect.client.test_artifact",
         "pyspark.sql.tests.connect.client.test_artifact_localcluster",
@@ -1195,7 +1196,7 @@ def __hash__(self):
         "pyspark.sql.tests.connect.client.test_client_call_stack_trace",
         "pyspark.sql.tests.connect.client.test_client_retries",
         "pyspark.sql.tests.connect.client.test_reattach",
-        "pyspark.sql.tests.connect.test_resources",
+        "pyspark.sql.tests.connect.test_parity_resources",
         "pyspark.sql.tests.connect.shell.test_progress",
         "pyspark.sql.tests.connect.test_df_debug",
         "pyspark.sql.tests.connect.arrow.test_parity_arrow",
@@ -1672,6 +1673,40 @@ def __hash__(self):
     test_tags=["org.apache.spark.tags.DockerTest"],
 )
 
+
+# dev_tools is a pseudo module that contains all the dev related files that
+# won't impact the CI build and tests (except for CI which is forced to
+# run anyway).
+# This module is created so modifying files in this module won't trigger any
+# tests to run.
+dev_tools = Module(
+    name="dev-tools",
+    dependencies=[],
+    source_file_regexes=[
+        ".*README.md",
+        ".*AGENTS.md",
+        r".*\.gitignore",
+        "CONTRIBUTING.md",
+        ".asf.yaml",
+        "SECURITY.md",
+        "NOTICE-binary",
+        "LICENSE-binary",
+        "ui-test/package.json",
+        "ui-test/package-lock.json",
+        "scalastyle-config.xml",
+        "dev/checkstyle.xml",
+        "dev/checkstyle-suppressions.xml",
+        "dev/spark-test-image/lint/Dockerfile",
+        "dev/lint-python",
+        "dev/lint-scala",
+        "dev/reformat-python",
+        "dev/structured_logging_style.py",
+        "dev/merge_spark_pr.py",
+        "dev/create_spark_jira.py",
+        "dev/create-release/",
+    ],
+)
+
 # The root module is a dummy module which is used to run all of the tests.
 # No other modules should directly depend on this module.
 root = Module(
diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py
index a66c0e70cb567..fff418bdb3489 100755
--- a/dev/sparktestsupport/utils.py
+++ b/dev/sparktestsupport/utils.py
@@ -33,26 +33,15 @@ def determine_modules_for_files(filenames):
     """
     Given a list of filenames, return the set of modules that contain those files.
     If a file is not associated with a more specific submodule, then this method will consider that
-    file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions,
-    and `README.md` is always ignored.
+    file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions.
 
     >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"]))
     ['pyspark-core', 'pyspark-errors', 'sql']
     >>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])]
     ['root']
-    >>> [x.name for x in determine_modules_for_files(["sql/README.md"])]
-    []
     """
     changed_modules = set()
     for filename in filenames:
-        if filename.endswith("README.md"):
-            continue
-        if filename in (
-            "scalastyle-config.xml",
-            "dev/checkstyle.xml",
-            "dev/checkstyle-suppressions.xml",
-        ):
-            continue
         if ("GITHUB_ACTIONS" not in os.environ) and filename.startswith(".github"):
             continue
         matched_at_least_one_module = False
@@ -115,8 +104,8 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
     >>> sorted([x.name for x in determine_modules_to_test([modules.sql])])
     ... # doctest: +NORMALIZE_WHITESPACE
     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
-     'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib',
-     'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
+     'mllib', 'pipelines', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect',
+     'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
      'pyspark-pandas-slow-connect', 'pyspark-pipelines', 'pyspark-sql',
      'pyspark-structured-streaming', 'pyspark-structured-streaming-connect',
      'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
@@ -124,8 +113,8 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
     ...     [modules.sparkr, modules.sql], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
-     'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib',
-     'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
+     'mllib', 'pipelines', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect',
+     'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
      'pyspark-pandas-slow-connect', 'pyspark-pipelines', 'pyspark-sql',
      'pyspark-structured-streaming', 'pyspark-structured-streaming-connect',
      'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
@@ -133,9 +122,9 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
     ...     [modules.sql, modules.core], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
     ['avro', 'catalyst', 'connect', 'core', 'docker-integration-tests', 'examples', 'graphx',
-     'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 'pyspark-connect',
-     'pyspark-core', 'pyspark-install', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib',
-     'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
+     'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'pipelines', 'protobuf',
+     'pyspark-connect', 'pyspark-core', 'pyspark-install', 'pyspark-ml', 'pyspark-ml-connect',
+     'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow',
      'pyspark-pandas-slow-connect', 'pyspark-pipelines', 'pyspark-resource', 'pyspark-sql',
      'pyspark-streaming', 'pyspark-structured-streaming', 'pyspark-structured-streaming-connect',
      'pyspark-testing', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming',
diff --git a/docs/_config.yml b/docs/_config.yml
index db0de55f3b519..5109d8d338a78 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,18 +19,16 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala.
-SPARK_VERSION: 4.2.0.1-4.3.0-0
-SPARK_VERSION_SHORT: 4.2.0.1-4.3.0-0
+SPARK_VERSION: 4.2.0.1-4.3.0-1
+SPARK_VERSION_SHORT: 4.2.0.1-4.3.0-1
 SCALA_BINARY_VERSION: "2.13"
 SCALA_VERSION: "2.13.18"
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
-# Before a new release, we should:
-#   1. update the `version` array for the new Spark documentation
-#      on https://github.com/algolia/docsearch-configs/blob/master/configs/apache_spark.json.
-#   2. update the value of `facetFilters.version` in `algoliaOptions` on the new release branch.
-# Otherwise, after release, the search results are always based on the latest documentation
-# (https://spark.apache.org/docs/latest/) even when visiting the documentation of previous releases.
+# The DocSearch index is maintained by the Algolia crawler at https://crawler.algolia.com/.
+# The crawler indexes only https://spark.apache.org/docs/latest/ and tags every page with
+# `version:latest`. All release branches share this single index, so `facetFilters` stays
+# pinned to `version:latest` everywhere and no per-release update is required.
 DOCSEARCH_SCRIPT: |
   docsearch({
       apiKey: 'd62f962a82bc9abb53471cb7b89da35e',
@@ -39,7 +37,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:4.2.0.1-4.3.0-0"]
+        'facetFilters': ["version:4.2.0.1-4.3.0-1"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb
index e6719c4bed7e3..429cef5aa026c 100644
--- a/docs/_plugins/build_api_docs.rb
+++ b/docs/_plugins/build_api_docs.rb
@@ -45,13 +45,11 @@ def build_spark_if_necessary
 
   print_header "Building Spark."
   cd(SPARK_PROJECT_ROOT)
-  # Maven may leave POM-only org.hamcrest:hamcrest-core trees under ~/.m2; SBT/Coursier then
-  # fails with "file:.../hamcrest-core-*.jar: not found". Clear before invoking SBT.
-  hamcrest_m2 = File.join(Dir.home, '.m2/repository/org/hamcrest/hamcrest-core')
-  FileUtils.rm_rf(hamcrest_m2)
   command = "NO_PROVIDED_SPARK_JARS=0 build/sbt -Phive -Pkinesis-asl clean package"
   puts "Running '#{command}'; this may take a few minutes..."
   system(command) || raise("Failed to build Spark")
+  # SPARK-53327: Use the modified ResourceImpl.class in spark-catalyst which is compatible with Java 25
+  system("zip -d assembly/target/scala-2.13/jars/datasketches-memory-3.0.2.jar org/apache/datasketches/memory/internal/ResourceImpl.class")
   $spark_package_is_built = true
 end
 
@@ -133,101 +131,147 @@ def build_spark_scala_and_java_docs_if_necessary
 
   command = "build/sbt -Pkinesis-asl unidoc"
   puts "Running '#{command}'..."
-  # Tee sbt output to a log file so we can diagnose failures. The most common
-  # unidoc failure is a javadoc crash mid-stream while generating HTML for a
-  # specific class, buried under ~100 benign errors on genjavadoc-generated
-  # Java stubs (e.g. target/java/org/apache/spark/ErrorInfo.java). Without the
-  # diagnostic below, the real culprit -- the source whose doc tripped javadoc
-  # -- is effectively invisible in the CI log.
-  log_file = File.join(SPARK_PROJECT_ROOT, "target", "unidoc-build.log")
-  mkdir_p(File.dirname(log_file))
-  success = stream_and_capture(command, log_file)
-  unless success
-    diagnose_unidoc_failure(log_file)
-    raise("Unidoc generation failed")
-  end
-end
 
-# Runs `command`, streaming every line to both stdout and `log_file`. Returns
-# true iff the command exited 0. Ruby-only; no shell pipefail reliance.
-def stream_and_capture(command, log_file)
-  File.open(log_file, 'w') do |f|
-    IO.popen("#{command} 2>&1", 'r') do |pipe|
-      pipe.each_line do |line|
+  # Two filter passes on the unidoc output, plus an additive fatal-error summary:
+  #
+  # 1. Genjavadoc-stub diagnostic blocks (~28 `[error]` lines on stubs under
+  #    `target/java/`, plus 3-5 continuation lines each). Inert because
+  #    `--ignore-source-errors` is set; matched by message text so legitimate
+  #    doclint diagnostics on stub paths still pass through.
+  #
+  # 2. `-verbose` progress lines (~13K total): `Loading source file ...`,
+  #    `[parsing started/completed ...]`, `[loading /path/X.class]`,
+  #    `Generating .../X.html`. These are dominant in the log when `-verbose`
+  #    is set (which it is in `JavaUnidoc / unidoc / javacOptions` to surface
+  #    per-file `error: reference not found` diagnostics) but carry no signal
+  #    of their own. Suppressing them brings the visible log from ~17K to ~5K
+  #    lines on a typical run while leaving every diagnostic untouched.
+  #
+  # 3. Fatal-error summary (additive, drops no log lines). The filtered log is
+  #    still ~4K lines and most `error:` text in it is non-fatal source-loading
+  #    chatter, so the build-failing diagnostics are hard to spot. After the
+  #    pipe closes, we print a `Fatal javadoc errors (N): ...` block and emit
+  #    `::error file=,line=::` GitHub Actions annotations so they surface in the
+  #    PR check panel. Captured strictly within the Standard Doclet phase
+  #    bracketed by `Building tree for all the packages and classes...` and
+  #    `Building index for all classes...`, which is where doclint diagnostics
+  #    are emitted -- this matches what javadoc counts toward exit code 1.
+  #    Self-checked against javadoc's own `N errors` summary line; a mismatch
+  #    emits a `::warning::` so future phase-marker drift is visible.
+  ansi = /\e\[[0-9;]*[A-Za-z]/
+  stub_header = %r{
+    \[(?:error|warn)\]\s+
+    \S*?/target/java/\S+\.java:\d+(?::\d+)?:\s+
+    error:\s+
+    (?:cannot\s+find\s+symbol
+     |illegal\s+combination\s+of\s+modifiers
+     |non-static\s+type\s+variable\b
+     |.*?\s+is\s+not\s+public\s+in\s+\S+;\s+cannot\s+be\s+accessed\s+from\s+outside\s+package)
+  }x
+  stub_cont = %r{\A\s*\[(?:error|warn)\]\s+(?!/\S+\.java:\d+(?::\d+)?:\s)}
+  verbose_line = %r{
+    \[(?:error|warn)\]\s+
+    (?:Loading\s+source\s+file\s
+     |\[parsing\s+(?:started|completed)\s
+     |\[loading\s
+     |\[checking\s
+     |\[wrote\s
+     |Generating\s+\S+\.html
+    )
+  }x
+
+  # Doclint phase tracking for the trailing summary. Standard Doclet bookends the
+  # phase that produces build-failing diagnostics with these marker lines; any
+  # `error:` outside this window is source-loading noise that does not contribute
+  # to javadoc's exit code. The summary below captures only the fatal ones and
+  # re-emits them as GitHub Actions annotations so they surface in the PR check
+  # panel instead of being buried in a 4K-line log.
+  doclint_start   = %r{\bBuilding\s+tree\s+for\s+all\s+the\s+packages\s+and\s+classes\b}
+  doclint_end     = %r{\bBuilding\s+index\s+for\s+all\s+classes\b}
+  doclint_diag    = %r{\A\[warn\]\s+(?<path>\S+):(?<lineno>\d+)(?::\d+)?:\s+error:\s+(?<msg>.+?)\s*\z}
+  doclint_cont    = %r{\A\[warn\]\s(?!\S+:\d+(?::\d+)?:\s+error:)(?<content>.*?)\s*\z}
+  doclint_summary = %r{\A\[warn\]\s+(?<count>[\d,]+)\s+errors?\s*\z}
+
+  in_stub = false
+  in_doclint = false
+  fatal_diagnostics = []
+  pending_context_lines = 0  # snippet + caret lines that follow each diag header
+  reported_error_count = nil
+
+  IO.popen("#{command} 2>&1", 'r') do |pipe|
+    pipe.each_line do |line|
+      plain = line.gsub(ansi, '')
+
+      if plain =~ doclint_start
+        in_doclint = true
+      elsif in_doclint && plain =~ doclint_end
+        in_doclint = false
+        pending_context_lines = 0
+      end
+
+      if in_doclint && (m = plain.match(doclint_diag))
+        fatal_diagnostics << {
+          path: m[:path], line: m[:lineno], msg: m[:msg], context: []
+        }
+        pending_context_lines = 2
+      elsif in_doclint && pending_context_lines > 0 &&
+            (m = plain.match(doclint_cont)) && !fatal_diagnostics.empty?
+        fatal_diagnostics.last[:context] << m[:content]
+        pending_context_lines -= 1
+      end
+
+      if reported_error_count.nil? && (m = plain.match(doclint_summary))
+        reported_error_count = m[:count].delete(',').to_i
+      end
+
+      if plain =~ verbose_line
+        in_stub = false
+        # suppress -verbose progress line
+      elsif plain =~ stub_header
+        in_stub = true
+      elsif in_stub && plain =~ stub_cont
+        # continuation of a stub block; suppress
+      else
+        in_stub = false
         $stdout.write(line)
         $stdout.flush
-        f.write(line)
       end
     end
   end
-  $?.success?
-end
 
-# Scans the captured unidoc log and prints a pointer to the most likely
-# culprit source file. The heuristic: when javadoc dies mid-HTML-generation,
-# the last "Generating .../X.html" line before "javadoc exited with exit code"
-# names the class that tripped it. Prints nothing actionable if the failure
-# mode doesn't match (e.g. a scaladoc error), in which case the full log above
-# already shows what's wrong.
-def diagnose_unidoc_failure(log_file)
-  return unless File.exist?(log_file)
-  begin
-    lines = File.readlines(log_file)
-
-    javadoc_exit_idx = lines.rindex { |l| l.include?("javadoc exited with exit code") }
-    last_generating = nil
-    if javadoc_exit_idx
-      # Strip ANSI color codes so the regex matches sbt-coloured output too.
-      ansi = /\e\[[0-9;]*[A-Za-z]/
-      lines[0...javadoc_exit_idx].reverse_each do |line|
-        if line.gsub(ansi, '') =~ %r{Generating .+/javaunidoc/(\S+?\.html)\.\.\.}
-          last_generating = $1
-          break
-        end
-      end
+  unless fatal_diagnostics.empty?
+    bar = "=" * 72
+    puts ""
+    puts bar
+    puts "Fatal javadoc errors (#{fatal_diagnostics.size}):"
+    puts bar
+    fatal_diagnostics.each_with_index do |d, i|
+      puts "  #{i + 1}. #{d[:path]}:#{d[:line]}: #{d[:msg]}"
+      d[:context].each { |c| puts "       #{c}" }
     end
-
-    banner = "=" * 78
-    $stderr.puts ""
-    $stderr.puts banner
-    $stderr.puts "Unidoc failed -- diagnostic summary"
-    $stderr.puts banner
-    if last_generating
-      class_path = last_generating.sub(/\.html$/, '')
-      class_name = class_path.tr('/', '.')
-      $stderr.puts ""
-      $stderr.puts "  Javadoc crashed while generating: #{last_generating}"
-      $stderr.puts "  Likely culprit: doc comment in #{class_name}"
-      $stderr.puts ""
-      $stderr.puts "  Javadoc can hard-exit (not just warn) on specific scaladoc"
-      $stderr.puts "  patterns once they have been passed through genjavadoc --"
-      $stderr.puts "  wiki-style `[[Class]]` / `[[method]]` links or inline-backticked"
-      $stderr.puts "  code refs in the Scala source for the class above are common"
-      $stderr.puts "  triggers. Start by auditing any recent doc-string changes in"
-      $stderr.puts "  that source file."
-      $stderr.puts ""
-      $stderr.puts "  NOTE: the '[error]' lines above on files under"
-      $stderr.puts "  target/java/... are benign genjavadoc stubs -- every PR"
-      $stderr.puts "  emits them and they do not cause the exit. Ignore them."
-    elsif javadoc_exit_idx
-      $stderr.puts ""
-      $stderr.puts "  Javadoc exited but no class HTML generation was in progress;"
-      $stderr.puts "  the crash predates HTML output -- likely a CLI / classpath /"
-      $stderr.puts "  setup issue. See the full sbt output above."
-    else
-      $stderr.puts ""
-      $stderr.puts "  Could not locate a 'javadoc exited with exit code' marker in"
-      $stderr.puts "  the log; the failure is likely outside the javaunidoc step"
-      $stderr.puts "  (scaladoc / sbt / build env). See the full sbt output above."
+    puts bar
+    puts ""
+
+    # GitHub Actions inline annotations. `%`, `\r`, `\n` require URL-style
+    # escaping per the workflow command spec; newlines render as multiple
+    # lines inside the annotation, so the source snippet and caret display
+    # under the error message in the PR check panel.
+    project_root = SPARK_PROJECT_ROOT + '/'
+    fatal_diagnostics.each do |d|
+      rel = d[:path].start_with?(project_root) ? d[:path][project_root.length..] : d[:path]
+      full = ([d[:msg]] + d[:context]).join("\n")
+      enc = full.gsub(/[%\r\n]/, '%' => '%25', "\r" => '%0D', "\n" => '%0A')
+      puts "::error file=#{rel},line=#{d[:line]},title=javadoc::#{enc}"
     end
-    $stderr.puts banner
-    $stderr.puts ""
-  rescue => e
-    # Never let the diagnostic helper itself obscure the underlying unidoc
-    # failure: if anything here goes wrong (e.g. encoding error reading the
-    # log), report it briefly and let the caller raise the real error.
-    $stderr.puts "(diagnostic helper failed: #{e.class}: #{e.message})"
   end
+
+  if reported_error_count && reported_error_count != fatal_diagnostics.size
+    puts "::warning::Javadoc reported #{reported_error_count} errors but " \
+         "build_api_docs.rb captured #{fatal_diagnostics.size}. The doclint " \
+         "phase markers may have shifted; please update build_api_docs.rb."
+  end
+
+  raise("Unidoc generation failed") unless $?.success?
 end
 
 def build_scala_and_java_docs
diff --git a/docs/building-spark.md b/docs/building-spark.md
index a2e3125be8d1d..f64304dd92fd3 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.9.15 and Java 17/21.
+Building Spark using Maven requires Maven 3.9.15 and Java 17/21/25.
 Spark requires Scala 2.13; support for Scala 2.12 was removed in Spark 4.0.0.
 
 ### Setting up Maven's Memory Usage
diff --git a/docs/declarative-pipelines-programming-guide.md b/docs/declarative-pipelines-programming-guide.md
index c5d18a7cb71be..e1c2c078212ae 100644
--- a/docs/declarative-pipelines-programming-guide.md
+++ b/docs/declarative-pipelines-programming-guide.md
@@ -180,6 +180,33 @@ Your pipelines implemented with the Python API must import this module. It's rec
 from pyspark import pipelines as dp
 ```
 
+### The Spark Session in Python Pipelines
+
+In Spark 4.1, every pipeline file had to declare `spark = SparkSession.active()` explicitly. Starting in Spark 4.2, the framework injects spark into each pipeline file's module namespace, so the explicit assignment is no longer required. 
+
+```python
+from pyspark import pipelines as dp
+
+@dp.materialized_view
+def my_view():
+    return spark.range(10)
+```
+
+Pipeline files that still include `spark = SparkSession.active()` continue to work correctly. However, if you do assign the session explicitly, `SparkSession.active()` is the only supported way to do so. For example, `SparkSession.builder.config(...).getOrCreate()` mutates session config, which is blocked in SDP.
+
+Note that without the explicit assignment, many tools and editors may consider `spark` and undefined name. To address that, you can add `spark: SparkSession` at module scope. SDP will still inject the actual session before the module runs, so this only documents the type for static analysis. 
+
+```python
+from pyspark import pipelines as dp
+from pyspark.sql import SparkSession
+
+spark: SparkSession
+
+@dp.materialized_view
+def my_view():
+    return spark.range(10)
+```
+
 ### Creating a Materialized View in Python
 
 The `@dp.materialized_view` decorator tells SDP to create a materialized view based on the results of a function that performs a batch read:
diff --git a/docs/img/AllJobsPage.png b/docs/img/AllJobsPage.png
new file mode 100644
index 0000000000000..19e1acc8cecf4
Binary files /dev/null and b/docs/img/AllJobsPage.png differ
diff --git a/docs/img/AllJobsPageDetail1.png b/docs/img/AllJobsPageDetail1.png
deleted file mode 100644
index de7e8c8883328..0000000000000
Binary files a/docs/img/AllJobsPageDetail1.png and /dev/null differ
diff --git a/docs/img/AllJobsPageDetail2.png b/docs/img/AllJobsPageDetail2.png
deleted file mode 100644
index b7203b2e66586..0000000000000
Binary files a/docs/img/AllJobsPageDetail2.png and /dev/null differ
diff --git a/docs/img/AllJobsPageDetail3.png b/docs/img/AllJobsPageDetail3.png
deleted file mode 100644
index 75b7caec119b2..0000000000000
Binary files a/docs/img/AllJobsPageDetail3.png and /dev/null differ
diff --git a/docs/img/AllStagesPage.png b/docs/img/AllStagesPage.png
new file mode 100644
index 0000000000000..52b2882f60abb
Binary files /dev/null and b/docs/img/AllStagesPage.png differ
diff --git a/docs/img/AllStagesPageDetail1.png b/docs/img/AllStagesPageDetail1.png
deleted file mode 100644
index ac3c48b5a9a16..0000000000000
Binary files a/docs/img/AllStagesPageDetail1.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail2.png b/docs/img/AllStagesPageDetail2.png
deleted file mode 100644
index 41d4165b92988..0000000000000
Binary files a/docs/img/AllStagesPageDetail2.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail3.png b/docs/img/AllStagesPageDetail3.png
deleted file mode 100644
index fd5267aa4a1c4..0000000000000
Binary files a/docs/img/AllStagesPageDetail3.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail4.png b/docs/img/AllStagesPageDetail4.png
deleted file mode 100644
index 2f038b3d6196b..0000000000000
Binary files a/docs/img/AllStagesPageDetail4.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail5.png b/docs/img/AllStagesPageDetail5.png
deleted file mode 100644
index 95d1f0e7f3bea..0000000000000
Binary files a/docs/img/AllStagesPageDetail5.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail6.png b/docs/img/AllStagesPageDetail6.png
deleted file mode 100644
index 1c4ec1594e031..0000000000000
Binary files a/docs/img/AllStagesPageDetail6.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail7.png b/docs/img/AllStagesPageDetail7.png
deleted file mode 100644
index 6ab37481aa157..0000000000000
Binary files a/docs/img/AllStagesPageDetail7.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail8.png b/docs/img/AllStagesPageDetail8.png
deleted file mode 100644
index a60745c27b166..0000000000000
Binary files a/docs/img/AllStagesPageDetail8.png and /dev/null differ
diff --git a/docs/img/AllStagesPageDetail9.png b/docs/img/AllStagesPageDetail9.png
deleted file mode 100644
index c471320cd9bbc..0000000000000
Binary files a/docs/img/AllStagesPageDetail9.png and /dev/null differ
diff --git a/docs/img/JobPage.png b/docs/img/JobPage.png
new file mode 100644
index 0000000000000..594bdcd30d35e
Binary files /dev/null and b/docs/img/JobPage.png differ
diff --git a/docs/img/JobPageDetail1.png b/docs/img/JobPageDetail1.png
deleted file mode 100644
index 1ee741d1f09d0..0000000000000
Binary files a/docs/img/JobPageDetail1.png and /dev/null differ
diff --git a/docs/img/JobPageDetail2.png b/docs/img/JobPageDetail2.png
deleted file mode 100644
index 5eb529eb7c275..0000000000000
Binary files a/docs/img/JobPageDetail2.png and /dev/null differ
diff --git a/docs/img/JobPageDetail3.png b/docs/img/JobPageDetail3.png
deleted file mode 100644
index 9f691e4ed2b6b..0000000000000
Binary files a/docs/img/JobPageDetail3.png and /dev/null differ
diff --git a/docs/img/StagePage.png b/docs/img/StagePage.png
new file mode 100644
index 0000000000000..9cbabd9eacf82
Binary files /dev/null and b/docs/img/StagePage.png differ
diff --git a/docs/img/webui-env-class.png b/docs/img/webui-env-class.png
deleted file mode 100644
index e57dada528d1e..0000000000000
Binary files a/docs/img/webui-env-class.png and /dev/null differ
diff --git a/docs/img/webui-env-hadoop.png b/docs/img/webui-env-hadoop.png
deleted file mode 100644
index e4ae232d18217..0000000000000
Binary files a/docs/img/webui-env-hadoop.png and /dev/null differ
diff --git a/docs/img/webui-env-sys.png b/docs/img/webui-env-sys.png
deleted file mode 100644
index e7d57fe1a84db..0000000000000
Binary files a/docs/img/webui-env-sys.png and /dev/null differ
diff --git a/docs/img/webui-env-tab.png b/docs/img/webui-env-tab.png
index d9bfc1d4adad0..2ef145ca3bf89 100644
Binary files a/docs/img/webui-env-tab.png and b/docs/img/webui-env-tab.png differ
diff --git a/docs/img/webui-exe-err.png b/docs/img/webui-exe-err.png
deleted file mode 100644
index 2fb11638faf74..0000000000000
Binary files a/docs/img/webui-exe-err.png and /dev/null differ
diff --git a/docs/img/webui-exe-tab.png b/docs/img/webui-exe-tab.png
index 8b835fd1f9740..287ba238677db 100644
Binary files a/docs/img/webui-exe-tab.png and b/docs/img/webui-exe-tab.png differ
diff --git a/docs/img/webui-exe-thread.png b/docs/img/webui-exe-thread.png
deleted file mode 100644
index 136d274159e16..0000000000000
Binary files a/docs/img/webui-exe-thread.png and /dev/null differ
diff --git a/docs/img/webui-sql-dag.png b/docs/img/webui-sql-dag.png
index 1c83c176da325..e20630bdddf52 100644
Binary files a/docs/img/webui-sql-dag.png and b/docs/img/webui-sql-dag.png differ
diff --git a/docs/img/webui-sql-plan.png b/docs/img/webui-sql-plan.png
deleted file mode 100644
index f88e0b24a5411..0000000000000
Binary files a/docs/img/webui-sql-plan.png and /dev/null differ
diff --git a/docs/img/webui-sql-tab.png b/docs/img/webui-sql-tab.png
index dca58e7d93a31..1d9660cc3df45 100644
Binary files a/docs/img/webui-sql-tab.png and b/docs/img/webui-sql-tab.png differ
diff --git a/docs/img/webui-storage-detail.png b/docs/img/webui-storage-detail.png
index 837b235be011c..9cb448e1ca463 100644
Binary files a/docs/img/webui-storage-detail.png and b/docs/img/webui-storage-detail.png differ
diff --git a/docs/img/webui-storage-tab.png b/docs/img/webui-storage-tab.png
index 3a832981cb93e..1f000bef95e2c 100644
Binary files a/docs/img/webui-storage-tab.png and b/docs/img/webui-storage-tab.png differ
diff --git a/docs/index.md b/docs/index.md
index cb32ddcde7e2b..6d590172e9380 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -34,7 +34,8 @@ source, visit [Building Spark](building-spark.html).
 
 Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it should run on any platform that runs a supported version of Java. This should include JVMs on x86_64 and ARM64. It's easy to run locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 17/21, Scala 2.13, Python 3.10+, and R 3.5+ (Deprecated).
+Spark runs on Java 17/21/25, Scala 2.13, Python 3.10+, and R 3.5+ (Deprecated).
+Java 25 prior to version 25.0.3 support is deprecated as of Spark 4.2.0.
 When using the Scala API, it is necessary for applications to use the same version of Scala that Spark was compiled for. Since Spark 4.0.0, it's Scala 2.13.
 
 # Running the Examples and Shell
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 777d8d4228e40..aa753e259bcc7 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -708,6 +708,18 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>4.1.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.allocation.recoveryMode.enabled</code></td>
+  <td><code>(none)</code></td>
+  <td>
+    When Spark driver detects an executor termination due to OOM, Spark starts to
+    allocate the recovery-mode executors which accept only a single task per executor JVM.
+    In other words, the recovery-mode executors replace the OOM-terminated executors to
+    survive from the resource-hungry tasks for the remaining tasks and stages.
+    If set to <code>false</code>, Spark will not use the recovery-mode executors.
+  </td>
+  <td>4.2.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.jars.avoidDownloadSchemes</code></td>
   <td><code>(none)</code></td>
@@ -1545,6 +1557,14 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.driver.annotateExitException</code></td>
+  <td><code>false</code></td>
+  <td>
+    If set to true, Spark will store the exit exception failed applications in the Kubernetes API server using the <code>spark.exit-exception</code> annotation.
+  </td>
+  <td>4.1.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.driver.service.ipFamilyPolicy</code></td>
   <td><code>SingleStack</code></td>
@@ -1563,6 +1583,14 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.4.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.executor.useDriverPodIP</code></td>
+  <td><code>false</code></td>
+  <td>
+    If true, executor pods use Driver pod IP directly instead of Driver Service.
+  </td>
+  <td>4.1.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.driver.ownPersistentVolumeClaim</code></td>
   <td><code>true</code></td>
@@ -1660,6 +1688,17 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.allocation.maxPendingPodsPerRp</code></td>
+  <td><code>Int.MaxValue</code></td>
+  <td>
+    Maximum number of pending PODs allowed per resource profile ID during executor
+    allocation. This provides finer-grained control over pending pods by limiting them
+    per resource profile rather than globally. When set, this limit is enforced
+    independently for each resource profile ID.
+  </td>
+  <td>4.1.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.allocation.pods.allocator</code></td>
   <td><code>direct</code></td>
@@ -1747,6 +1786,67 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.executor.resizeInterval</code></td>
+  <td><code>0s</code></td>
+  <td>
+    Interval between executor resize operations. To disable, set 0 (default).
+    Takes effect only when <code>org.apache.spark.scheduler.cluster.k8s.ExecutorResizePlugin</code>
+    is registered via <code>spark.plugins</code>.
+  </td>
+  <td>4.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.executor.resizeThreshold</code></td>
+  <td><code>0.9</code></td>
+  <td>
+    The threshold to resize.
+    Takes effect only when <code>org.apache.spark.scheduler.cluster.k8s.ExecutorResizePlugin</code>
+    is registered via <code>spark.plugins</code>.
+  </td>
+  <td>4.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.executor.resizeFactor</code></td>
+  <td><code>0.1</code></td>
+  <td>
+    The factor to resize.
+    Takes effect only when <code>org.apache.spark.scheduler.cluster.k8s.ExecutorResizePlugin</code>
+    is registered via <code>spark.plugins</code>.
+  </td>
+  <td>4.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.executor.pvc.resizeInterval</code></td>
+  <td><code>5min</code></td>
+  <td>
+    Interval between executor PVC resize operations, in minutes. Defaults to 5 minutes.
+    Set to 0 to disable. Must be 0 or a positive multiple of 5 minutes.
+    Takes effect only when <code>org.apache.spark.scheduler.cluster.k8s.ExecutorPVCResizePlugin</code>
+    is registered via <code>spark.plugins</code>.
+  </td>
+  <td>4.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.executor.pvc.resizeThreshold</code></td>
+  <td><code>0.5</code></td>
+  <td>
+    The PVC usage ratio (used / capacity) above which the driver triggers a resize.
+    Takes effect only when <code>org.apache.spark.scheduler.cluster.k8s.ExecutorPVCResizePlugin</code>
+    is registered via <code>spark.plugins</code>.
+  </td>
+  <td>4.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.executor.pvc.resizeFactor</code></td>
+  <td><code>1.0</code></td>
+  <td>
+    The factor to grow PVC storage by, relative to the current request.
+    Takes effect only when <code>org.apache.spark.scheduler.cluster.k8s.ExecutorPVCResizePlugin</code>
+    is registered via <code>spark.plugins</code>.
+  </td>
+  <td>4.2.0</td>
+</tr>
 </table>
 
 #### Pod template properties
@@ -1953,10 +2053,10 @@ Spark allows users to specify a custom Kubernetes schedulers.
 #### Using Volcano as Customized Scheduler for Spark on Kubernetes
 
 ##### Prerequisites
-* Spark on Kubernetes with [Volcano](https://volcano.sh/en) as a custom scheduler is supported since Spark v3.3.0 and Volcano v1.7.0. Below is an example to install Volcano 1.14.1:
+* Spark on Kubernetes with [Volcano](https://volcano.sh/en) as a custom scheduler is supported since Spark v3.3.0 and Volcano v1.7.0. Below is an example to install Volcano 1.14.2:
 
   ```bash
-  kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.1/installer/volcano-development.yaml
+  kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.2/installer/volcano-development.yaml
   ```
 
 ##### Build
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index dca3b03eeb4e7..620e3800ff010 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -27,6 +27,9 @@ license: |
 - Since Spark 4.2, Spark enables order-independent checksums for shuffle outputs by default to detect data inconsistencies during indeterminate shuffle stage retries. If a checksum mismatch is detected, Spark rolls back and re-executes all succeeding stages that depend on the shuffle output. If rolling back is not possible for some succeeding stages, the job will fail. To restore the previous behavior, set `spark.sql.shuffle.orderIndependentChecksum.enabled` and `spark.sql.shuffle.orderIndependentChecksum.enableFullRetryOnMismatch` to `false`.
 - Since Spark 4.2, support for Derby JDBC datasource is deprecated.
 - Since Spark 4.2, a new default method `mergeWith` has been added to the `CustomTaskMetric` interface. The default implementation sums the two metric values, which is correct for count-type metrics. Data source connector implementations that report non-additive metrics (e.g., maximum, average, compression ratio, or gauge values) must override `mergeWith` to provide correct merge semantics.
+- Since Spark 4.2, the virtual `system` catalog hosts the new `system.builtin` and `system.session` namespaces. `system.builtin` exposes built-in functions and functions injected through `SparkSessionExtensions`; `system.session` exposes temporary views, temporary functions, and session variables created in the current session. As a result, 2-part references like `builtin.func()` and `session.func()` now follow a mini-path that tries the system namespace first and the current catalog second, so a persistent schema named `builtin` or `session` is no longer reached by `builtin.func()` / `session.func()` when the system namespace contains an object of the same name. To restore the previous behavior (current catalog first), set `spark.sql.legacy.persistentCatalogFirst` to `true`. Persistent schemas with these names are still allowed but should be reached with an explicit catalog prefix (for example, `spark_catalog.session.x`). See [Reserved system names](sql-ref-identifier.html#reserved-system-names).
+- Since Spark 4.2, `CREATE TEMPORARY VIEW`, `CREATE TEMPORARY FUNCTION`, and the corresponding `DROP` statements accept the `session` and `system.session` qualifiers on the object name (in addition to the previously supported unqualified form); for example, `CREATE TEMPORARY VIEW system.session.v AS ...` and `DROP TEMPORARY FUNCTION session.f` are now valid. Any other qualifier on a temporary object is rejected with `INVALID_TEMP_OBJ_QUALIFIER`.
+- Since Spark 4.2, the SQL standard `PATH` feature is available: the `SET PATH` statement, the `current_path()` function, path-based resolution of unqualified routines, tables, views, and session variables, and the configurations `spark.sql.path.enabled` (default `false`) and `spark.sql.defaultPath`. The feature is opt-in; when `spark.sql.path.enabled` is `false`, unqualified resolution falls back to a fixed default path and `SET PATH` is rejected with `UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED`. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) and [Name Resolution](sql-ref-name-resolution.html).
 
 ## Upgrading from Spark SQL 4.0 to 4.1
 
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 8621eca79a6c8..4f21b7b4b3c79 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -420,6 +420,7 @@ Below is a list of all the keywords in Spark SQL.
 |ANTI|non-reserved|strict-non-reserved|non-reserved|
 |ANY|reserved|non-reserved|reserved|
 |ANY_VALUE|non-reserved|non-reserved|non-reserved|
+|APPROX|non-reserved|non-reserved|non-reserved|
 |ARCHIVE|non-reserved|non-reserved|non-reserved|
 |ARRAY|non-reserved|non-reserved|reserved|
 |AS|reserved|non-reserved|reserved|
@@ -429,6 +430,7 @@ Below is a list of all the keywords in Spark SQL.
 |ATOMIC|non-reserved|non-reserved|non-reserved|
 |AUTHORIZATION|reserved|non-reserved|reserved|
 |BEGIN|non-reserved|non-reserved|non-reserved|
+|BERNOULLI|non-reserved|non-reserved|non-reserved|
 |BETWEEN|non-reserved|non-reserved|reserved|
 |BIGINT|non-reserved|non-reserved|reserved|
 |BINARY|non-reserved|non-reserved|reserved|
@@ -515,6 +517,7 @@ Below is a list of all the keywords in Spark SQL.
 |DFS|non-reserved|non-reserved|non-reserved|
 |DIRECTORIES|non-reserved|non-reserved|non-reserved|
 |DIRECTORY|non-reserved|non-reserved|non-reserved|
+|DISTANCE|non-reserved|non-reserved|non-reserved|
 |DISTINCT|reserved|non-reserved|reserved|
 |DISTRIBUTE|non-reserved|non-reserved|non-reserved|
 |DIV|non-reserved|non-reserved|not a keyword|
@@ -528,6 +531,7 @@ Below is a list of all the keywords in Spark SQL.
 |ESCAPE|reserved|non-reserved|reserved|
 |ESCAPED|non-reserved|non-reserved|non-reserved|
 |EVOLUTION|non-reserved|non-reserved|non-reserved|
+|EXACT|non-reserved|non-reserved|non-reserved|
 |EXCEPT|reserved|strict-non-reserved|reserved|
 |EXCHANGE|non-reserved|non-reserved|non-reserved|
 |EXCLUDE|non-reserved|non-reserved|non-reserved|
@@ -648,6 +652,7 @@ Below is a list of all the keywords in Spark SQL.
 |NANOSECOND|non-reserved|non-reserved|non-reserved|
 |NANOSECONDS|non-reserved|non-reserved|non-reserved|
 |NATURAL|reserved|strict-non-reserved|reserved|
+|NEAREST|non-reserved|non-reserved|non-reserved|
 |NEXT|non-reserved|non-reserved|non-reserved|
 |NO|non-reserved|non-reserved|reserved|
 |NONE|non-reserved|non-reserved|reserved|
@@ -738,6 +743,7 @@ Below is a list of all the keywords in Spark SQL.
 |SETS|non-reserved|non-reserved|non-reserved|
 |SHORT|non-reserved|non-reserved|non-reserved|
 |SHOW|non-reserved|non-reserved|non-reserved|
+|SIMILARITY|non-reserved|non-reserved|non-reserved|
 |SINGLE|non-reserved|non-reserved|non-reserved|
 |SKEWED|non-reserved|non-reserved|non-reserved|
 |SMALLINT|non-reserved|non-reserved|reserved|
@@ -760,6 +766,7 @@ Below is a list of all the keywords in Spark SQL.
 |SUBSTR|non-reserved|non-reserved|non-reserved|
 |SUBSTRING|non-reserved|non-reserved|non-reserved|
 |SYNC|non-reserved|non-reserved|non-reserved|
+|SYSTEM|non-reserved|non-reserved|reserved|
 |SYSTEM_PATH|non-reserved|non-reserved|not a keyword|
 |SYSTEM_TIME|non-reserved|non-reserved|non-reserved|
 |SYSTEM_VERSION|non-reserved|non-reserved|non-reserved|
diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md
index 743ad4e3abb22..0ae05d8f46bef 100644
--- a/docs/sql-ref-datatypes.md
+++ b/docs/sql-ref-datatypes.md
@@ -95,8 +95,8 @@ Spark SQL and DataFrames support the following data types:
 
 * Spatial types
   Spatial objects as defined in the [OGC Simple Feature Access](https://portal.ogc.org/files/?artifact_id=25355) specification.
-  - `GeometryType`: Represents GEOMETRY values—spatial objects in a Cartesian coordinate system. The type can be fixed to a single SRID, e.g. `geometry(4326)`, or allow mixed SRIDs with `geometry(any)`. Default SRID when not specified is 4326 (WGS 84).
-  - `GeographyType`: Represents GEOGRAPHY values—spatial objects in a geographic coordinate system (latitude/longitude). Edge interpolation is always SPHERICAL. The type can be fixed to a single SRID, e.g. `geography(4326)`, or allow mixed SRIDs with `geography(any)`. Default SRID is 4326 (WGS 84).
+  - `GeometryType`: Represents GEOMETRY values, spatial objects in a Cartesian coordinate system. The type can be fixed to a single SRID, e.g. `geometry(4326)`, or allow mixed SRIDs with `geometry(any)`. In SQL, `GEOMETRY` columns must always be declared with an explicit SRID or `ANY`.
+  - `GeographyType`: Represents GEOGRAPHY values, spatial objects in a geographic coordinate system (latitude/longitude). Edge interpolation is always SPHERICAL. The type can be fixed to a single geographic SRID, e.g. `geography(4326)`, or allow mixed SRIDs with `geography(any)`. In SQL, `GEOGRAPHY` columns must always be declared with an explicit SRID or `ANY`.
   For more details and built-in functions, see [Geospatial (Geometry/Geography) types](sql-ref-geospatial-types.html).
 
 * Complex types
@@ -143,8 +143,8 @@ from pyspark.sql.types import *
 |**TimestampNTZType**|datetime.datetime|TimestampNTZType()|
 |**DateType**|datetime.date|DateType()|
 |**DayTimeIntervalType**|datetime.timedelta|DayTimeIntervalType()|
-|**GeometryType**|Geometry|GeometryType() or GeometryType(*srid*)|
-|**GeographyType**|Geography|GeographyType() or GeographyType(*srid*)|
+|**GeometryType**|Geometry|GeometryType(*srid*)<br/>**Note:** *srid* is required and may be an `int` or the string `"ANY"`.|
+|**GeographyType**|Geography|GeographyType(*srid*)<br/>**Note:** *srid* is required and may be an `int` or the string `"ANY"`.|
 |**ArrayType**|list, tuple, or array|ArrayType(*elementType*, [*containsNull*])<br/>**Note:**The default value of *containsNull* is True.|
 |**MapType**|dict|MapType(*keyType*, *valueType*, [*valueContainsNull]*)<br/>**Note:**The default value of *valueContainsNull* is True.|
 |**StructType**|list or tuple|StructType(*fields*)<br/>**Note:** *fields* is a Seq of StructFields. Also, two fields with the same name are not allowed.|
@@ -179,8 +179,8 @@ You can access them by doing
 |**TimeType**|java.time.LocalTime|TimeType|
 |**YearMonthIntervalType**|java.time.Period|YearMonthIntervalType|
 |**DayTimeIntervalType**|java.time.Duration|DayTimeIntervalType|
-|**GeometryType**|org.apache.spark.sql.types.Geometry|GeometryType or GeometryType(*srid*)|
-|**GeographyType**|org.apache.spark.sql.types.Geography|GeographyType or GeographyType(*srid*)|
+|**GeometryType**|org.apache.spark.sql.types.Geometry|GeometryType(*srid*)|
+|**GeographyType**|org.apache.spark.sql.types.Geography|GeographyType(*srid*)|
 |**ArrayType**|scala.collection.Seq|ArrayType(*elementType*, [*containsNull]*)<br/>**Note:** The default value of *containsNull* is true.|
 |**MapType**|scala.collection.Map|MapType(*keyType*, *valueType*, [*valueContainsNull]*)<br/>**Note:** The default value of *valueContainsNull* is true.|
 |**StructType**|org.apache.spark.sql.Row|StructType(*fields*)<br/>**Note:** *fields* is a Seq of StructFields. Also, two fields with the same name are not allowed.|
@@ -272,8 +272,8 @@ The following table shows the type names as well as aliases used in Spark SQL pa
 |**DecimalType**|DECIMAL, DEC, NUMERIC|
 |**YearMonthIntervalType**|INTERVAL YEAR, INTERVAL YEAR TO MONTH, INTERVAL MONTH|
 |**DayTimeIntervalType**|INTERVAL DAY, INTERVAL DAY TO HOUR, INTERVAL DAY TO MINUTE, INTERVAL DAY TO SECOND, INTERVAL HOUR, INTERVAL HOUR TO MINUTE, INTERVAL HOUR TO SECOND, INTERVAL MINUTE, INTERVAL MINUTE TO SECOND, INTERVAL SECOND|
-|**GeometryType**|GEOMETRY or GEOMETRY(*srid*) or GEOMETRY(ANY)|
-|**GeographyType**|GEOGRAPHY or GEOGRAPHY(*srid*) or GEOGRAPHY(ANY)|
+|**GeometryType**|GEOMETRY(*srid*) or GEOMETRY(ANY)|
+|**GeographyType**|GEOGRAPHY(*srid*) or GEOGRAPHY(ANY)|
 |**ArrayType**|ARRAY\<element_type>|
 |**StructType**|STRUCT<field1_name: field1_type, field2_name: field2_type, ...><br/> **Note:** ':' is optional.|
 |**MapType**|MAP<key_type, value_type>|
diff --git a/docs/sql-ref-function-current-path.md b/docs/sql-ref-function-current-path.md
new file mode 100644
index 0000000000000..afe0d4f6ba54d
--- /dev/null
+++ b/docs/sql-ref-function-current-path.md
@@ -0,0 +1,85 @@
+---
+layout: global
+title: current_path function
+displayTitle: current_path function
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+Returns the effective SQL Path for the current session as a comma-separated string of
+qualified namespace names. See [`SET PATH`](sql-ref-syntax-aux-conf-mgmt-set-path.html) for a
+description of what the path is, how to enable it, and how to change it, and
+[Name Resolution](sql-ref-name-resolution.html) for how the path drives unqualified name
+resolution.
+
+### Syntax
+
+```sql
+current_path()
+```
+
+### Arguments
+
+This function takes no arguments. The parentheses may be omitted.
+
+### Returns
+
+A non-nullable `STRING`. Each path entry is written as a dotted name with backticks added only
+where required by Spark's identifier rules. Entries are separated by a single comma.
+
+When the path contains the virtual `CURRENT_SCHEMA` marker, the marker is materialized as the
+catalog-qualified current schema (`current_catalog.current_schema`) each time
+`current_path()` is evaluated, so subsequent `USE SCHEMA` statements are reflected without
+re-issuing `SET PATH`.
+
+### Examples
+
+```sql
+> SELECT current_path();
+ system.builtin,system.session,spark_catalog.default
+
+-- ANSI no-parens form returns the same value.
+> SELECT CURRENT_PATH;
+ system.builtin,system.session,spark_catalog.default
+
+-- The output reflects the latest SET PATH.
+> SET PATH = spark_catalog.default, system.builtin;
+> SELECT current_path();
+ spark_catalog.default,system.builtin
+
+-- CURRENT_SCHEMA on the path is re-evaluated on every call.
+> SET PATH = CURRENT_SCHEMA, system.builtin;
+> USE spark_catalog.finance;
+> SELECT current_path();
+ spark_catalog.finance,system.builtin
+> USE spark_catalog.default;
+> SELECT current_path();
+ spark_catalog.default,system.builtin
+
+-- Inside a persistent view or SQL function body, current_path() returns the invoker's path,
+-- not the frozen path captured at creation time.
+> SET PATH = spark_catalog.default, system.builtin;
+> CREATE VIEW v_path AS SELECT current_path() AS p;
+> SET PATH = spark_catalog.other, system.builtin;
+> SELECT * FROM v_path;
+ spark_catalog.other,system.builtin
+```
+
+### Related Statements
+
+* [Name Resolution](sql-ref-name-resolution.html)
+* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html)
+* [Built-in Functions](sql-ref-functions-builtin.html)
diff --git a/docs/sql-ref-functions-builtin.md b/docs/sql-ref-functions-builtin.md
index b6572609a34b8..22e52d0500c53 100644
--- a/docs/sql-ref-functions-builtin.md
+++ b/docs/sql-ref-functions-builtin.md
@@ -17,6 +17,10 @@ license: |
   limitations under the License.
 ---
 
+All built-in functions live in the virtual schema `system.builtin`. They can always be referenced
+unambiguously by their fully qualified name (for example `system.builtin.abs`), regardless of any
+user-defined function that may share the same name.
+
 ### Aggregate Functions
 {% include_api_gen generated-agg-funcs-table.html %}
 #### Examples
@@ -126,3 +130,8 @@ license: |
 {% include_api_gen generated-variant-funcs-table.html %}
 #### Examples
 {% include_api_gen generated-variant-funcs-examples.html %}
+
+### Geospatial ST Functions
+{% include_api_gen generated-st-funcs-table.html %}
+#### Examples
+{% include_api_gen generated-st-funcs-examples.html %}
diff --git a/docs/sql-ref-geospatial-types.md b/docs/sql-ref-geospatial-types.md
index d0fb1c6ed9465..d5a9d0fece84b 100644
--- a/docs/sql-ref-geospatial-types.md
+++ b/docs/sql-ref-geospatial-types.md
@@ -25,8 +25,13 @@ Spark SQL supports **GEOMETRY** and **GEOGRAPHY** types for spatial data, as def
 
 | Type | Coordinate system | Typical use and notes |
 |------|-------------------|------------------------|
-| **GEOMETRY** | Cartesian (planar) | Projected or local coordinates; planar calculations. Represents points, lines, polygons in a flat coordinate system. Suitable for Web Mercator (SRID 3857), UTM, or local grids (e.g. engineering/CAD). Default SRID in Spark is 4326. |
-| **GEOGRAPHY** | Geographic (latitude/longitude) | Earth-based data; distances and areas on the sphere/ellipsoid. Coordinates in longitude and latitude (degrees). Edge interpolation is always **SPHERICAL**. Default SRID is 4326 (WGS 84). |
+| **GEOMETRY** | Cartesian (planar) | Projected or local coordinates; planar calculations. Represents points, lines, polygons in a flat coordinate system. Suitable for Web Mercator (SRID 3857), UTM, or local grids (e.g. engineering/CAD). Accepts any SRID in the registry, including SRID 0 (unspecified CRS). |
+| **GEOGRAPHY** | Geographic (latitude/longitude) | Earth-based data; distances and areas on the sphere/ellipsoid. Coordinates in longitude and latitude (degrees). Edge interpolation is always **SPHERICAL**. Only geographic SRIDs are accepted; the most common is 4326 (WGS 84). |
+
+In SQL, `GEOMETRY` and `GEOGRAPHY` columns must always be declared with an explicit SRID
+(or `ANY`); see [Type Syntax in SQL](#type-syntax-in-sql) below. When a value is constructed
+via `ST_GeomFromWKB(wkb)` without an explicit SRID, the value's SRID is `0` (unspecified),
+while `ST_GeogFromWKB(wkb)` always returns a value with SRID 4326.
 
 #### When to use GEOMETRY vs GEOGRAPHY
 
@@ -113,16 +118,18 @@ When parsing WKB, Spark applies the following rules. Violations result in a pars
 
 ### Built-in Geospatial (ST) Functions
 
-Spark SQL provides scalar functions for working with GEOMETRY and GEOGRAPHY values. They are grouped under **st_funcs** in the [Built-in Functions](sql-ref-functions-builtin.html) API.
+Spark SQL provides scalar functions for working with GEOMETRY and GEOGRAPHY values. The full list,
+with detailed argument descriptions and examples, is on the
+[Built-in Functions](sql-ref-functions-builtin.html#geospatial-st-functions) page under
+**Geospatial ST Functions**. The functions provided in the current release are summarized here:
 
 | Function | Description |
 |----------|-------------|
-| `ST_AsBinary(geo)` | Returns the GEOMETRY or GEOGRAPHY value as WKB (BINARY). |
-| `ST_GeomFromWKB(wkb)` | Parses WKB and returns a GEOMETRY with default SRID 0. |
-| `ST_GeomFromWKB(wkb, srid)` | Parses WKB and returns a GEOMETRY with the given SRID. |
+| `ST_AsBinary(geo[, endianness])` | Returns the GEOMETRY or GEOGRAPHY value as WKB (BINARY). The optional `endianness` argument is `'NDR'` for little-endian (default) or `'XDR'` for big-endian. |
+| `ST_GeomFromWKB(wkb[, srid])` | Parses WKB and returns a GEOMETRY. The optional `srid` argument sets the SRID; if omitted, the SRID is `0`. |
 | `ST_GeogFromWKB(wkb)` | Parses WKB and returns a GEOGRAPHY with SRID 4326. |
 | `ST_Srid(geo)` | Returns the SRID of the GEOMETRY or GEOGRAPHY value (NULL if input is NULL). |
-| `ST_SetSrid(geo, srid)` | Returns a new GEOMETRY or GEOGRAPHY with the given SRID. |
+| `ST_SetSrid(geo, srid)` | Returns a new GEOMETRY or GEOGRAPHY with the given SRID. The new SRID must be valid for the value's type. |
 
 **Examples:**
 
@@ -130,6 +137,9 @@ Spark SQL provides scalar functions for working with GEOMETRY and GEOGRAPHY valu
 SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040')));
 -- 0101000000000000000000F03F0000000000000040
 
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR'));
+-- 00000000013FF00000000000004000000000000000
+
 SELECT ST_Srid(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'));
 -- 4326
 
@@ -139,9 +149,68 @@ SELECT ST_Srid(ST_SetSrid(ST_GeomFromWKB(X'0101000000000000000000F03F00000000000
 
 ### SRID and Stored Values
 
-* **Fixed-SRID columns**: Every value in the column must have the same SRID as the column type. Inserting a value with a different SRID can raise an error (or you can use `ST_SetSrid` to set the value’s SRID to match the column).
-* **Mixed-SRID columns** (`GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`): Values can have different SRIDs. Only valid SRIDs are allowed.
-* **Storage**: Parquet, Delta, and Iceberg store geometry/geography with a fixed SRID per column; mixed-SRID types are for in-memory/query use. When writing to these formats, a concrete (fixed) SRID is required.
+* **Fixed-SRID columns**: Every value in the column must have the same SRID as the column type. Inserting a value with a different SRID raises a `GEO_ENCODER_SRID_MISMATCH_ERROR`. Use `ST_SetSrid` to change a value's SRID to match the column.
+* **Mixed-SRID columns** (`GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`): Values can have different SRIDs per row. Each value must still have a valid SRID for the type; an invalid SRID raises `ST_INVALID_SRID_VALUE`.
+* **Storage**: Parquet, Delta, and Iceberg store geometry/geography with a fixed SRID per column. They do not support persisting `GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`; mixed-SRID types exist for in-memory/query use only.
+
+### Supported SRIDs
+
+Spark includes a pre-built SRID registry that combines coordinate systems from the PROJ database with OGC standard overrides. This registry enables validation and proper handling of coordinate systems for geospatial data.
+
+**SRID Compatibility Rules:**
+- **GEOMETRY** accepts all SRIDs in the registry (geographic + projected + SRID 0)
+- **GEOGRAPHY** only accepts geographic SRIDs (latitude/longitude coordinate systems)
+
+#### PROJ Version by Spark Release
+
+| Spark Version | PROJ Version |
+|---------------|--------------|
+| 4.2.0 | 9.8.1 |
+
+The SRID registry is pinned to the PROJ version shown above and is not synced live with external databases.
+
+#### OGC Standard Overrides
+
+Spark applies the following OGC standard overrides to specific SRIDs from the PROJ database:
+
+| SRID | PROJ CRS Identifier | OGC CRS Identifier | Description |
+|------|---------------------|-------------------|-------------|
+| 4326 | `EPSG:4326` | `OGC:CRS84` | WGS 84 (longitude/latitude order per OGC standard) |
+| 4267 | `EPSG:4267` | `OGC:CRS27` | NAD27 |
+| 4269 | `EPSG:4269` | `OGC:CRS83` | NAD83 |
+
+
+#### Commonly Used SRIDs
+
+| SRID | CRS Identifier | Name | CRS Type | Description |
+|------|----------------|------|----------|-------------|
+| 0 | `SRID:0` | Unspecified | Cartesian | Coordinates with no defined CRS (default for `ST_GeomFromWKB(wkb)`) |
+| 4326 | `OGC:CRS84` | WGS 84 | Geographic | World Geodetic System 1984 (longitude/latitude), GPS coordinates, global data (default for GEOGRAPHY) |
+| 4267 | `OGC:CRS27` | NAD27 | Geographic | North American Datum 1927 |
+| 4269 | `OGC:CRS83` | NAD83 | Geographic | North American Datum 1983 |
+| 3857 | `EPSG:3857` | Web Mercator | Projected | Pseudo-Mercator projection used by web mapping services |
+
+**Notes:**
+* `GEOMETRY(0)` means a fixed SRID of 0. For mixed per-row SRIDs, use `GEOMETRY(ANY)`.
+* [Parquet](https://github.com/apache/parquet-format/blob/master/Geospatial.md)
+  and [Iceberg](https://github.com/apache/iceberg/blob/main/format/spec.md) geospatial
+  specifications require a fixed SRID per column, so they do not support persisting
+  `GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`.
+
+#### SRID Validation
+
+**Invalid SRID (not in registry):**
+```sql
+SELECT ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 99999);
+-- Throws [ST_INVALID_SRID_VALUE]
+```
+
+**Projected SRID with GEOGRAPHY type:**
+```sql
+CREATE TABLE invalid_geo (id BIGINT, loc GEOGRAPHY(3857));
+-- Throws [ST_INVALID_SRID_VALUE] (3857 is projected, not geographic)
+```
+
 
 ### Data Types Reference
 
diff --git a/docs/sql-ref-identifier.md b/docs/sql-ref-identifier.md
index 7aca08ea9fd8d..b4a990c4111b3 100644
--- a/docs/sql-ref-identifier.md
+++ b/docs/sql-ref-identifier.md
@@ -52,6 +52,30 @@ An identifier is a string used to identify a database object such as a table, vi
 
     Any character from the character set. Use <code>`</code> to escape special characters (e.g., <code>`</code>).
 
+### Reserved system names
+
+`system`, `session`, and `builtin` have special meaning and should not be used as user-defined
+catalog or schema names.
+
+| Name | Position | Notes |
+| :--- | :------- | :---- |
+| `system` | catalog | Virtual catalog hosting `system.builtin` and `system.session`. Spark does not load `system` through the v2 catalog API; setting `spark.sql.catalog.system = ...` is unsupported and produces undefined results. The current catalog cannot be `system`. |
+| `builtin` | schema | A persistent schema named `builtin` is allowed but discouraged because it collides with `system.builtin`. |
+| `session` | schema | A persistent schema named `session` is allowed but discouraged because it collides with `system.session`. |
+
+A partially qualified 2-part reference like `builtin.x` or `session.x` walks a small **mini-path** to
+choose the implicit catalog: by default it resolves to `system.builtin.x` / `system.session.x`
+if such an object exists, and otherwise falls back to the same name in the current catalog. So
+an object in a persistent `builtin` or `session` schema is shadowed only when an object of the
+same name exists in the corresponding system namespace. The shadowed object stays reachable via its fully qualified 3-part name (for example
+`spark_catalog.session.x`). Set `spark.sql.legacy.persistentCatalogFirst` to `true` to reverse
+the preference: the current catalog is tried first and the system namespace becomes the fallback.
+
+The `system.builtin` and `system.session` namespaces are described in
+[SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). Temporary objects in `system.session` are
+documented under [CREATE VIEW](sql-ref-syntax-ddl-create-view.html) and
+[CREATE FUNCTION (SQL)](sql-ref-syntax-ddl-create-sql-function.html).
+
 ### Examples
 
 ```sql
diff --git a/docs/sql-ref-name-resolution.md b/docs/sql-ref-name-resolution.md
index 2532f05e164b3..3d574e58a9ad2 100644
--- a/docs/sql-ref-name-resolution.md
+++ b/docs/sql-ref-name-resolution.md
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Name resolution is the process by which [identifiers](sql-ref-identifier.html) are resolved to specific column-, field-, parameter-, or table-references.
+Name resolution is the process by which [identifiers](sql-ref-identifier.html) are resolved to specific column-, field-, parameter-, table-, function-, or variable-references.
 
 ## Column, field, parameter, and variable resolution
 
@@ -50,7 +50,7 @@ In detail, resolution of identifiers to a specific reference follows these rules
 
    1. **Parameterless function reference**
 
-      If the identifier is unqualified and matches `current_user`, `current_date`, or `current_timestamp`: Resolve it as one of these functions.
+      If the identifier is unqualified and matches `current_user`, `current_date`, `current_time`, `current_timestamp`, or `current_path`: Resolve it as one of these functions.
 
    1. **Column DEFAULT specification**
 
@@ -137,7 +137,10 @@ In detail, resolution of identifiers to a specific reference follows these rules
 
 1. **Session Variables**
 
-   1. Match the identifier to a variable name. If the identifier is qualified, the qualifier must be `session` or `system.session`.
+   1. Match the identifier to a session variable name.
+      If the identifier is qualified, the qualifier must be `session` or `system.session`.
+      If the identifier is unqualified, `system.session` must be present on the
+      [SQL Path](sql-ref-syntax-aux-conf-mgmt-set-path.html) (the default path includes it).
    1. If the identifier is qualified, match to a field or map key of a variable following rule 1.c
 
 ### Limitations
@@ -256,37 +259,54 @@ This restriction also applies to parameter references in SQL functions.
   frm.a  lat.b  func.c
 ```
 
-## Table and view resolution
-
-An identifier in table-reference can be any one of the following:
+## Object name resolution
 
-- Persistent table or view
-- Common table expression (CTE)
-- [Temporary view](sql-ref-syntax-ddl-create-view.html)
+Tables, views, and functions follow the same resolution rule. It depends on how many parts the
+identifier has.
 
-Resolution of an identifier depends on whether it is qualified:
+### Fully qualified (3 parts) &mdash; `catalog.schema.object`
 
-- **Qualified**
+The reference is unique and is looked up in `catalog.schema`. `system.builtin.object` identifies
+a built-in function; `system.session.object` identifies a temporary view, function, or session
+variable.
 
-  If the identifier is fully qualified with three parts: `catalog.schema.relation`, it is unique.
+### Partially qualified (2 parts) &mdash; `schema.object`
 
-  If the identifier consists of two parts: `schema.relation`, it is further qualified with the result of `SELECT current_catalog()` to make it unique.
+The identifier is qualified with `current_catalog` &mdash; producing
+`current_catalog.schema.object` &mdash; unless the leading part is `session` (or `builtin`, for
+functions). In that case Spark uses the
+[mini-path](sql-ref-identifier.html#reserved-system-names) to choose the implicit catalog,
+returning the first match:
 
-- **Unqualified**
+| `spark.sql.legacy.persistentCatalogFirst` | Mini-path tried in order |
+| :-------------------------------------- | :----------------------- |
+| `false` (default) | the system namespace (`system.session.x` / `system.builtin.x`), then the current catalog's `session.x` / `builtin.x` |
+| `true` (legacy)   | the current catalog's `session.x` / `builtin.x`, then the system namespace (`system.session.x` / `system.builtin.x`) |
 
-  1. **Common table expression**
+### Unqualified (1 part) &mdash; `object`
 
-     If the reference is within the scope of a `WITH` clause, match the identifier to a CTE starting with the immediately containing `WITH` clause and moving outwards from there.
+In queries and DML, Spark walks the [SQL Path](sql-ref-syntax-aux-conf-mgmt-set-path.html) and
+returns the first match. In DDL, the identifier is qualified with `current_catalog.current_schema`.
 
-  1. **Temporary view**
+> Note: persistent views and SQL UDFs capture the SQL Path at `CREATE` time. When the view or
+> function is invoked, its body resolves names &mdash; tables, views, and functions &mdash;
+> against that frozen path, not the invoker's current path. `current_schema()` and
+> `current_path()` inside the body still return the invoker's context. See
+> [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html).
 
-     Match the identifier to any temporary view defined within the current session.
+## Table and view resolution
 
-  1. **Persisted table**
+A table reference can be a persistent table or view, a temporary view, or a common table
+expression (CTE).
 
-     Fully qualify the identifier by pre-pending the result of `SELECT current_catalog()` and `SELECT current_schema()` and look it up as a persistent relation.
+Resolution follows [Object name resolution](#object-name-resolution), with one addition for
+unqualified references: when the reference is inside a `WITH` clause, Spark first matches the
+identifier against CTEs from the innermost `WITH` outward. If no CTE matches, Spark walks the
+SQL Path.
 
-If the relation cannot be resolved to any table, view, or CTE, Databricks raises a TABLE_OR_VIEW_NOT_FOUND error.
+If the relation cannot be resolved, Spark raises `TABLE_OR_VIEW_NOT_FOUND`. The error includes
+the effective search path, for example
+`searchPath = [system.builtin, system.session, spark_catalog.default]`.
 
 ### Examples
 
@@ -317,7 +337,13 @@ If the relation cannot be resolved to any table, view, or CTE, Databricks raises
 > SELECT c1 FROM rel;
  2
 
--- Temporary views cannot be qualified, so qualifiecation resolved to the table:
+-- A temporary view can be qualified with `session` or `system.session`:
+> SELECT c1 FROM session.rel;
+ 2
+> SELECT c1 FROM system.session.rel;
+ 2
+
+-- Other 2-part qualifications resolve to the persisted table:
 > SELECT c1 FROM default.rel;
  1
 
@@ -343,45 +369,34 @@ If the relation cannot be resolved to any table, view, or CTE, Databricks raises
                    SELECT 1),
                 cte;
   [TABLE_OR_VIEW_NOT_FOUND] The table or view `cte` cannot be found.
-```
-
-## Function resolution
-
-A function reference is recognized by the mandatory trailing set of parentheses.
-
-It can resolve to:
-
-- A builtin function provided by Spark,
-- A temporary user defined function scoped to the current session, or
-- A persistent user defined function.
 
-Resolution of a function name depends on whether it is qualified:
+-- PATH drives unqualified relation lookup order
+> CREATE SCHEMA db_a;
+> CREATE SCHEMA db_b;
+> CREATE TABLE db_a.t USING parquet AS SELECT 1 AS v;
+> CREATE TABLE db_b.t USING parquet AS SELECT 2 AS v;
 
-- **Qualified**
-
-  If the name is fully qualified with three parts: `catalog.schema.function`, it is unique.
-
-  If the name consists of two parts: `schema.function`, it is further qualified with the result of `SELECT current_catalog()` to make it unique.
-
-  The function is then looked up in the catalog.
-
-- **Unqualified**
-
-  For unqualified function names Spark follows a fixed order of precedence (`PATH`):
-
-  1. **Builtin function**
-
-     If a function by this name exists among the set of built-in functions, that function is chosen.
+> SET PATH = spark_catalog.db_a, spark_catalog.db_b, system.builtin;
+> SELECT v FROM t;
+ 1
 
-  1. **Temporary function**
+> SET PATH = spark_catalog.db_b, spark_catalog.db_a, system.builtin;
+> SELECT v FROM t;
+ 2
 
-     If a function by this name exists among the set of temporary functions, that function is chosen.
+-- Three-part `system.session.x` references the temporary scope only:
+> SELECT * FROM system.session.no_such_view;
+  [TABLE_OR_VIEW_NOT_FOUND] ... `system`.`session`.`no_such_view` ...
+```
 
-  1. **Persisted function**
+## Function resolution
 
-     Fully qualify the function name by pre-pending the result of `SELECT current_catalog()` and `SELECT current_schema()` and look it up as a persistent function.
+A function reference is recognized by the trailing parentheses, and follows
+[Object name resolution](#object-name-resolution).
 
-If the function cannot be resolved Spark raises an `UNRESOLVED_ROUTINE` error.
+If the function cannot be resolved, Spark raises `UNRESOLVED_ROUTINE`. The error includes the
+effective search path, for example
+`searchPath = [system.builtin, system.session, spark_catalog.default]`.
 
 ### Examples
 
@@ -420,4 +435,45 @@ If the function cannot be resolved Spark raises an `UNRESOLVED_ROUTINE` error.
 -- To resolve the persistent function it now needs qualification
 > SELECT spark_catalog.default.func(4, 3);
  6
+
+-- A built-in can always be reached by qualification, even when shadowed.
+-- Put system.session ahead of system.builtin so a matching temp `abs` shadows the built-in.
+> SET PATH = system.session, system.builtin, spark_catalog.default;
+> CREATE TEMPORARY FUNCTION abs(x INT) RETURNS INT RETURN x + 100;
+
+-- Unqualified abs(-5) resolves to the temp (-5 + 100 = 95).
+> SELECT abs(-5);
+ 95
+
+-- system.builtin.abs and builtin.abs reach the built-in around the shadow.
+> SELECT system.builtin.abs(-5);
+ 5
+> SELECT builtin.abs(-5);
+ 5
+
+-- session.abs reaches the temp explicitly.
+> SELECT session.abs(-5);
+ 95
+
+> DROP TEMPORARY FUNCTION abs;
+> SET PATH = DEFAULT_PATH;
+
+-- PATH controls unqualified routine lookup order
+> CREATE SCHEMA path_a;
+> CREATE SCHEMA path_b;
+> CREATE FUNCTION path_a.pick() RETURNS INT RETURN 10;
+> CREATE FUNCTION path_b.pick() RETURNS INT RETURN 20;
+
+> SET PATH = spark_catalog.path_a, spark_catalog.path_b, system.builtin;
+> SELECT pick();
+ 10
+
+> SET PATH = spark_catalog.path_b, spark_catalog.path_a, system.builtin;
+> SELECT pick();
+ 20
+
+-- Unresolved routine lists the effective search path
+> SET PATH = spark_catalog.default, system.builtin;
+> SELECT does_not_exist();
+  [UNRESOLVED_ROUTINE] ... searchPath: [`spark_catalog`.`default`, `system`.`builtin`] ...
 ```
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set-path.md b/docs/sql-ref-syntax-aux-conf-mgmt-set-path.md
new file mode 100644
index 0000000000000..64e698fa193ae
--- /dev/null
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-set-path.md
@@ -0,0 +1,248 @@
+---
+layout: global
+title: SET PATH
+displayTitle: SET PATH
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+
+`SET PATH` changes the **SQL Path** of the current session.
+
+The SQL Path is an ordered list of catalog-qualified schema names that Spark walks when
+resolving unqualified references to functions, tables, views, and session variables in queries
+and DML (`SELECT`, `INSERT`, `UPDATE`, `DELETE`, `MERGE`). The first match wins. DDL
+(`CREATE TABLE`, `CREATE VIEW`, `CREATE FUNCTION`, `DROP`, `ALTER`, ...) resolves unqualified
+object names against `current_catalog.current_schema`, not the path; so `CREATE TABLE t` always
+creates `t` in the current schema regardless of the path.
+
+The path can include two virtual namespaces in the `system` catalog:
+
+- `system.builtin` &mdash; built-in functions, including those injected by
+  `SparkSessionExtensions`.
+- `system.session` &mdash; temporary views, temporary functions, and session variables in the
+  current session.
+
+`SET PATH` is controlled by `spark.sql.path.enabled`. When it is `false` (the default),
+`SET PATH` raises `UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED`. Unqualified resolution and
+[`current_path()`](sql-ref-function-current-path.html) still use the default path.
+
+The initial value of `PATH` in a session is `DEFAULT_PATH`. `DEFAULT_PATH` is either the value of
+`spark.sql.defaultPath`, or, when that configuration is empty, a built-in value composed of
+`system.builtin`, `system.session`, and the current schema. To override, set
+`spark.sql.defaultPath`. See the [`DEFAULT_PATH` parameter](#parameters) for the exact derivation
+rules.
+
+The effect of `SET PATH` is scoped to the current session and is lost when the session ends. To
+re-apply the current default path mid-session, run `SET PATH = DEFAULT_PATH`. (This stores a
+snapshot of `DEFAULT_PATH` at the moment of the statement; later changes to
+`spark.sql.defaultPath` are not picked up automatically.) Cloned sessions inherit the parent's
+path at clone time; later changes in the child do not propagate back.
+
+Persistent views and SQL UDFs capture the path at `CREATE` time into the object's metadata.
+Each invocation resolves the body against that frozen path, not the invoker's current path;
+`current_schema()` and `current_path()` inside the body still return the invoker's context.
+
+The leading names `session` and `builtin` have special meaning in 2-part references; see
+[Reserved system names](sql-ref-identifier.html#reserved-system-names).
+
+### Syntax
+
+```sql
+SET PATH = path_element [ , ... ]
+
+path_element
+    { DEFAULT_PATH |
+      SYSTEM_PATH |
+      PATH |
+      CURRENT_SCHEMA |
+      CURRENT_DATABASE |
+      catalog_name . namespace [ . namespace ... ] }
+```
+
+### Parameters
+
+* **`DEFAULT_PATH`**
+
+  Expands to the session's default path. The default path has two layers:
+
+  1. If `spark.sql.defaultPath` is set to a non-empty value, that value is parsed using the same
+     grammar as `SET PATH` (with one restriction: the `PATH` keyword is not allowed inside the
+     conf value, since it would be self-referential).
+
+     The conf value is validated for syntax at the time it is set; an invalid value is rejected.
+     Static duplicates inside the conf are tolerated (unlike interactive `SET PATH`, which
+     rejects them) so a later `USE SCHEMA` cannot turn a previously valid default into a runtime
+     error. A `DEFAULT_PATH` token inside the conf value resolves to the spark-built-in default
+     below to avoid a cycle, rather than recursing.
+
+  2. If `spark.sql.defaultPath` is empty (the factory setting), the spark-built-in default
+     applies: `system.builtin`, `system.session`, and the current schema
+     (`current_catalog.current_schema`), in that order.
+
+  To change the default path, set `spark.sql.defaultPath` via any of the usual mechanisms
+  (`SET spark.sql.defaultPath = ...` at runtime, `--conf` on `spark-submit`, `SparkConf`, or
+  `spark-defaults.conf`); clear it with `RESET spark.sql.defaultPath` to return to the
+  spark-built-in default.
+
+* **`SYSTEM_PATH`**
+
+  Expands to the system-managed namespaces under the `system` catalog. Today this is just
+  `system.builtin`, but it is reserved for future system-managed schemas (for example, hosting
+  built-in AI, geospatial, or ML functions).
+
+* **`PATH`**
+
+  Expands to the **current** value of the SQL Path. Useful for appending entries without
+  re-typing them, for example `SET PATH = PATH, spark_catalog.analytics`.
+  `PATH` is not allowed in the value of `spark.sql.defaultPath` (it would create a cycle).
+
+* **`CURRENT_SCHEMA`** / **`CURRENT_DATABASE`**
+
+  A virtual marker that resolves to the catalog-qualified current schema
+  (`current_catalog.current_schema`) every time the path is consulted. This means subsequent
+  `USE SCHEMA` statements are picked up without re-issuing `SET PATH`.
+  `CURRENT_DATABASE` is a synonym for `CURRENT_SCHEMA`.
+
+* **`catalog_name . namespace [ . namespace ... ]`**
+
+  An explicit catalog-qualified namespace reference (`catalog.schema` or, for catalogs with
+  multi-level namespaces, `catalog.ns1.ns2...`). At least two parts are required.
+  The catalog and namespace do not need to exist at the time of `SET PATH`; non-existent entries
+  are silently skipped during name resolution.
+
+  Identifier quoting follows the usual rules. Backtick-quoted parts that contain a dot are
+  preserved, for example ``spark_catalog.`sch.b` ``.
+
+### Semantics
+
+* Setting the path takes effect immediately.
+* Identifier case is preserved in storage and in `current_path()` output.
+* Duplicate entries are detected after expansion and raise `DUPLICATE_SQL_PATH_ENTRY`.
+  Comparisons honor the session's case sensitivity setting. Because `CURRENT_DATABASE` is an
+  alias for `CURRENT_SCHEMA`, listing both is flagged as a duplicate.
+
+### Error conditions
+
+| Condition | Cause |
+| :-------- | :---- |
+| `UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED` | `SET PATH` was issued while `spark.sql.path.enabled` is `false`. |
+| `INVALID_SQL_PATH_SCHEMA_REFERENCE` | An entry with fewer than two parts was given. |
+| `DUPLICATE_SQL_PATH_ENTRY` | Two entries collapsed to the same concrete namespace after expansion. |
+
+### Examples
+
+```sql
+-- Enable the feature first; the default is false.
+> SET spark.sql.path.enabled = true;
+
+-- Observe the default path.
+> SELECT current_path();
+ system.builtin,system.session,spark_catalog.default
+
+-- Replace the path with explicit entries.
+> SET PATH = spark_catalog.default, system.builtin;
+> SELECT current_path();
+ spark_catalog.default,system.builtin
+
+-- Identifier case is preserved.
+> SET PATH = Spark_Catalog.Default, System.Builtin;
+> SELECT current_path();
+ Spark_Catalog.Default,System.Builtin
+
+-- Backtick-quoted parts that contain a dot round-trip with quoting.
+> SET PATH = spark_catalog.`sch.b`, system.builtin;
+> SELECT current_path();
+ spark_catalog.`sch.b`,system.builtin
+
+-- DEFAULT_PATH and SYSTEM_PATH shortcuts.
+> SET PATH = DEFAULT_PATH;
+> SELECT current_path();
+ system.builtin,system.session,spark_catalog.default
+> SET PATH = SYSTEM_PATH;
+> SELECT current_path();
+ system.builtin
+
+-- SYSTEM_PATH composes naturally with the working schema.
+> SET PATH = SYSTEM_PATH, CURRENT_SCHEMA;
+> SELECT current_path();
+ system.builtin,spark_catalog.default
+
+-- Append an entry by referring to the current path.
+> SET PATH = spark_catalog.default, system.builtin;
+> SET PATH = PATH, spark_catalog.analytics;
+> SELECT current_path();
+ spark_catalog.default,system.builtin,spark_catalog.analytics
+
+-- CURRENT_SCHEMA is re-evaluated each time; USE SCHEMA updates the effective path.
+> SET PATH = CURRENT_SCHEMA, system.builtin;
+> USE spark_catalog.finance;
+> SELECT current_path();
+ spark_catalog.finance,system.builtin
+> USE spark_catalog.default;
+> SELECT current_path();
+ spark_catalog.default,system.builtin
+
+-- DEFAULT_PATH can be customized via the conf.
+> SET spark.sql.defaultPath = system.session, system.builtin, current_schema;
+> SET PATH = DEFAULT_PATH;
+> SELECT current_path();
+ system.session,system.builtin,spark_catalog.default
+> RESET spark.sql.defaultPath;
+
+-- Append a schema of shared UDFs so callers do not have to qualify them.
+> CREATE SCHEMA spark_catalog.shared_udfs;
+> CREATE FUNCTION spark_catalog.shared_udfs.to_iso_date(d DATE) RETURNS STRING
+    RETURN date_format(d, 'yyyy-MM-dd');
+> SET PATH = PATH, spark_catalog.shared_udfs;
+> SELECT to_iso_date(DATE'2026-05-22');
+ 2026-05-22
+
+-- Drop system.session from the path to force temporary objects to be qualified explicitly.
+> CREATE TEMPORARY FUNCTION revenue() RETURNS INT RETURN 42;
+> SELECT revenue();                  -- resolves via the default path
+ 42
+> SET PATH = system.builtin, current_schema;
+> SELECT revenue();                  -- now must be qualified
+ [UNRESOLVED_ROUTINE] `revenue` ...
+> SELECT session.revenue();
+ 42
+
+-- Error cases.
+> SET PATH = spark_catalog.default, spark_catalog.default;
+  [DUPLICATE_SQL_PATH_ENTRY]
+
+> SET PATH = my_schema_no_catalog;
+  [INVALID_SQL_PATH_SCHEMA_REFERENCE]
+
+-- PATH is rejected as a value of the DEFAULT_PATH conf (would cycle).
+> SET spark.sql.defaultPath = PATH, system.builtin;
+  [Error: invalid value]
+
+-- SET PATH is rejected when the feature is disabled.
+> SET spark.sql.path.enabled = false;
+> SET PATH = spark_catalog.default;
+  [UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED]
+```
+
+### Related Statements
+
+* [Name Resolution](sql-ref-name-resolution.html)
+* [`current_path` function](sql-ref-function-current-path.html)
+* [SET](sql-ref-syntax-aux-conf-mgmt-set.html)
+* [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html)
+* [USE DATABASE](sql-ref-syntax-ddl-usedb.html)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set.md b/docs/sql-ref-syntax-aux-conf-mgmt-set.md
index 9e57a221f9688..396559ca48e74 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt-set.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-set.md
@@ -25,6 +25,8 @@ The SET command sets a property, returns the value of an existing property or re
 
 To set SQL variables defined with [DECLARE VARIABLE](sql-ref-syntax-ddl-declare-variable.html) use [SET VAR](sql-ref-syntax-aux-set-var.html).
 
+To change the session SQL Path used for unqualified name resolution use [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html).
+
 ### Syntax
 
 ```sql
@@ -72,3 +74,4 @@ SET spark.sql.variable.substitute;
 
 * [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html)
 * [SET VAR](sql-ref-syntax-aux-set-var.html)
+* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt.md b/docs/sql-ref-syntax-aux-conf-mgmt.md
index 3312bcb503500..6b809d4a94655 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt.md
@@ -22,3 +22,4 @@ license: |
  * [SET](sql-ref-syntax-aux-conf-mgmt-set.html)
  * [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html)
  * [SET TIME ZONE](sql-ref-syntax-aux-conf-mgmt-set-timezone.html)
+ * [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html)
diff --git a/docs/sql-ref-syntax-aux-describe-function.md b/docs/sql-ref-syntax-aux-describe-function.md
index 0c5a3d751a564..2da1b9466fc23 100644
--- a/docs/sql-ref-syntax-aux-describe-function.md
+++ b/docs/sql-ref-syntax-aux-describe-function.md
@@ -22,9 +22,15 @@ license: |
 ### Description
 
 `DESCRIBE FUNCTION` statement returns the basic metadata information of an
-existing function. The metadata information includes the function name, implementing
-class and the usage details.  If the optional `EXTENDED` option is specified, the basic
-metadata information is returned along with the extended usage information.
+existing function. For built-in and external (Java/Hive) functions the output includes the
+function name, implementing class, and usage details. For
+[SQL user-defined functions](sql-ref-syntax-ddl-create-sql-function.html) the output describes
+the function signature (input parameters, return type/columns) and, with `EXTENDED`, the
+function body, characteristics, and the frozen
+[SQL Path](sql-ref-syntax-aux-conf-mgmt-set-path.html) that was captured at creation time.
+
+If the optional `EXTENDED` option is specified, the basic metadata is returned along with the
+extended information.
 
 ### Syntax
 
@@ -36,12 +42,14 @@ metadata information is returned along with the extended usage information.
 
 * **function_name**
 
-    Specifies a name of an existing function in the system. The function name may be
-    optionally qualified with a database name. If `function_name` is qualified with
-    a database then the function is resolved from the user specified database, otherwise
-    it is resolved from the current database.
+    Specifies a name of an existing function. The function name follows the regular
+    [name resolution](sql-ref-name-resolution.html#function-resolution) rules: unqualified
+    names walk the SQL Path; 3-part names target the chosen `catalog.schema` directly
+    (including the system namespaces `system.builtin` and `system.session`); 2-part names that
+    lead with `builtin` or `session` follow a mini-path across the system namespace and the
+    current catalog.
 
-    **Syntax:** `[ database_name. ] function_name`
+    **Syntax:** `[ catalog_name. ] [ database_name. ] function_name`
 
 ### Examples
 
@@ -102,6 +110,70 @@ DESC FUNCTION EXTENDED explode;
 |       10                                                      |
 |       20                                                      |
 +---------------------------------------------------------------+
+
+-- Built-in functions can be qualified with `builtin` or `system.builtin`.
+DESC FUNCTION system.builtin.abs;
++-------------------------------------------------------------------+
+|function_desc                                                      |
++-------------------------------------------------------------------+
+|Function: abs                                                      |
+|Class: org.apache.spark.sql.catalyst.expressions.Abs               |
+|Usage: abs(expr) - Returns the absolute value of the numeric value.|
++-------------------------------------------------------------------+
+
+-- Describe a SQL scalar UDF: the output uses the SQL function layout
+-- (Function / Type / Input / Returns).
+CREATE FUNCTION area(x DOUBLE, y DOUBLE) RETURNS DOUBLE RETURN x * y;
+DESC FUNCTION area;
++-------------------------------+
+|function_desc                  |
++-------------------------------+
+|Function: spark_catalog.default.area|
+|Type:     SCALAR               |
+|Input:    x DOUBLE             |
+|          y DOUBLE             |
+|Returns:  DOUBLE               |
++-------------------------------+
+
+-- Describe a SQL table UDF.
+CREATE FUNCTION getemps(deptno INT)
+  RETURNS TABLE (id INT, name STRING)
+  RETURN SELECT id, name FROM employee WHERE employee.deptno = getemps.deptno;
+DESC FUNCTION getemps;
++--------------------------------------+
+|function_desc                         |
++--------------------------------------+
+|Function: spark_catalog.default.getemps|
+|Type:     TABLE                       |
+|Input:    deptno INT                  |
+|Returns:  id   INT                    |
+|          name STRING                 |
++--------------------------------------+
+
+-- DESC FUNCTION EXTENDED for a SQL UDF adds the body, the characteristic clauses,
+-- the captured SQL configs, the owner, the create time, and the frozen SQL Path.
+SET PATH = spark_catalog.default, system.builtin;
+CREATE FUNCTION frozen_fn() RETURNS INT
+  COMMENT 'demo function'
+  RETURN (SELECT MAX(id) FROM frozen_t);
+DESC FUNCTION EXTENDED frozen_fn;
++-----------------------------------------------------------------+
+|function_desc                                                    |
++-----------------------------------------------------------------+
+|Function:     spark_catalog.default.frozen_fn                    |
+|Type:         SCALAR                                             |
+|Input:        ()                                                 |
+|Returns:      INT                                                |
+|Comment:      demo function                                      |
+|Deterministic:false                                              |
+|Data Access:  READS SQL DATA                                     |
+|Configs:      spark.sql.ansi.enabled=true                        |
+|              ...                                                |
+|Owner:        <USER>                                             |
+|Create Time:  Wed Apr 30 14:05:43 PDT 2026                       |
+|Body:         (SELECT MAX(id) FROM frozen_t)                     |
+|SQL Path:     spark_catalog.default, system.builtin              |
++-----------------------------------------------------------------+
 ```
 
 ### Related Statements
@@ -109,3 +181,5 @@ DESC FUNCTION EXTENDED explode;
 * [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
 * [DESCRIBE TABLE](sql-ref-syntax-aux-describe-table.html)
 * [DESCRIBE QUERY](sql-ref-syntax-aux-describe-query.html)
+* [CREATE FUNCTION (SQL)](sql-ref-syntax-ddl-create-sql-function.html)
+* [Name Resolution](sql-ref-name-resolution.html)
diff --git a/docs/sql-ref-syntax-aux-describe-table.md b/docs/sql-ref-syntax-aux-describe-table.md
index 46d9432f5d072..cb84b0c7fefb2 100644
--- a/docs/sql-ref-syntax-aux-describe-table.md
+++ b/docs/sql-ref-syntax-aux-describe-table.md
@@ -105,6 +105,10 @@ to return the metadata pertaining to a partition or column respectively.
       "view_schema_mode": "<view_schema_mode>",
       "view_catalog_and_namespace": "<view_catalog_and_namespace>",
       "view_query_output_columns": ["col1", "col2"],
+      // SQL Path captured at the time of permanent view creation
+      "sql_path": [
+        {"catalog_name": "<catalog_name>", "namespace": ["<namespace>"]}
+      ],
       // Spark SQL configurations captured at the time of permanent view creation
       "view_creation_spark_configuration": {
         "conf1": "<value1>",
@@ -272,8 +276,83 @@ DESCRIBE customer salesdb.customer.name;
 +---------+----------+
 
 -- Returns the table metadata in JSON format.
+-- (Formatted for readability; the actual output is on a single line.)
 DESC FORMATTED customer AS JSON;
-{"table_name":"customer","catalog_name":"spark_catalog","schema_name":"default","namespace":["default"],"columns":[{"name":"cust_id","type":{"name":"integer"},"nullable":true},{"name":"name","type":{"name":"string"},"comment":"Short name","nullable":true},{"name":"state","type":{"name":"varchar","length":20},"nullable":true}],"location": "file:/tmp/salesdb.db/custom...","created_time":"2020-04-07T14:05:43Z","last_access":"UNKNOWN","created_by":"None","type":"MANAGED","provider":"parquet","partition_provider":"Catalog","partition_columns":["state"]}
+{
+  "table_name": "customer",
+  "catalog_name": "spark_catalog",
+  "schema_name": "default",
+  "namespace": ["default"],
+  "columns": [
+    {"name": "cust_id", "type": {"name": "int"}, "nullable": true},
+    {"name": "name", "type": {"name": "string"}, "comment": "Short name", "nullable": true},
+    {"name": "state", "type": {"name": "varchar", "length": 20}, "nullable": true}
+  ],
+  "location": "file:/tmp/salesdb.db/custom...",
+  "created_time": "2020-04-07T14:05:43Z",
+  "last_access": "UNKNOWN",
+  "created_by": "None",
+  "type": "MANAGED",
+  "provider": "parquet",
+  "partition_provider": "Catalog",
+  "partition_columns": ["state"]
+}
+
+-- DESCRIBE EXTENDED on a view emits view-specific rows.
+SET PATH = spark_catalog.default, system.builtin;
+CREATE VIEW recent_customers AS
+    SELECT cust_id, name FROM customer WHERE cust_id > 1000;
+
+DESCRIBE EXTENDED recent_customers;
++----------------------------+---------------------------------------+--------+
+|                    col_name|                              data_type| comment|
++----------------------------+---------------------------------------+--------+
+|                     cust_id|                                    int|    null|
+|                        name|                                 string|    null|
+|                            |                                       |        |
+|# Detailed Table Information|                                       |        |
+|                    Catalog |                          spark_catalog|        |
+|                    Database|                                default|        |
+|                       Table|                       recent_customers|        |
+|                        Type|                                   VIEW|        |
+|                   View Text|SELECT cust_id, name FROM customer ... |        |
+|          View Original Text|SELECT cust_id, name FROM customer ... |        |
+|            View Schema Mode|                           COMPENSATION|        |
+| View Catalog and Namespace|                spark_catalog.default   |        |
+|   View Query Output Columns|                   [`cust_id`, `name`]  |        |
+|                    SQL Path|   spark_catalog.default, system.builtin|        |
++----------------------------+---------------------------------------+--------+
+
+-- The same metadata in JSON form.
+-- (Formatted for readability; the actual output is on a single line.)
+DESCRIBE EXTENDED recent_customers AS JSON;
+{
+  "table_name": "recent_customers",
+  "catalog_name": "spark_catalog",
+  "schema_name": "default",
+  "namespace": ["default"],
+  "columns": [
+    {"name": "cust_id", "type": {"name": "int"}, "nullable": true},
+    {"name": "name", "type": {"name": "string", "collation": "UTF8_BINARY"}, "nullable": true}
+  ],
+  "created_time": "2026-05-22T10:00:00Z",
+  "last_access": "UNKNOWN",
+  "created_by": "Spark 4.2.0",
+  "type": "VIEW",
+  "collation": "UTF8_BINARY",
+  "view_text": "SELECT cust_id, name FROM customer WHERE cust_id > 1000",
+  "view_original_text": "SELECT cust_id, name FROM customer WHERE cust_id > 1000",
+  "view_schema_mode": "COMPENSATION",
+  "view_catalog_and_namespace": "spark_catalog.default",
+  "view_query_output_columns": ["cust_id", "name"],
+  "sql_path": [
+    {"catalog_name": "spark_catalog", "namespace": ["default"]},
+    {"catalog_name": "system", "namespace": ["builtin"]}
+  ],
+  "view_creation_spark_configuration": {
+    "spark.sql.ansi.enabled": "true"
+  }
+}
 ```
 
 ### Related Statements
@@ -281,3 +360,4 @@ DESC FORMATTED customer AS JSON;
 * [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
 * [DESCRIBE QUERY](sql-ref-syntax-aux-describe-query.html)
 * [DESCRIBE FUNCTION](sql-ref-syntax-aux-describe-function.html)
+* [Name Resolution](sql-ref-name-resolution.html)
diff --git a/docs/sql-ref-syntax-ddl-create-database.md b/docs/sql-ref-syntax-ddl-create-database.md
index 9d8bf47844724..9125ca78dc9ee 100644
--- a/docs/sql-ref-syntax-ddl-create-database.md
+++ b/docs/sql-ref-syntax-ddl-create-database.md
@@ -38,6 +38,9 @@ CREATE { DATABASE | SCHEMA } [ IF NOT EXISTS ] database_name
 
     Specifies the name of the database to be created.
 
+    > Note: avoid naming a database `session` or `builtin`; see
+    > [Reserved system names](sql-ref-identifier.html#reserved-system-names).
+
 * **IF NOT EXISTS**
 
     Creates a database with the given name if it does not exist. If a database with the same name already exists, nothing will happen.
@@ -85,3 +88,4 @@ DESCRIBE DATABASE EXTENDED customer_db;
 
 * [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
 * [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
+* [Name Resolution](sql-ref-name-resolution.html)
diff --git a/docs/sql-ref-syntax-ddl-create-function.md b/docs/sql-ref-syntax-ddl-create-function.md
index e0e2545f5ee3f..2565870494410 100644
--- a/docs/sql-ref-syntax-ddl-create-function.md
+++ b/docs/sql-ref-syntax-ddl-create-function.md
@@ -50,8 +50,9 @@ CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ]
 * **TEMPORARY**
 
     Indicates the scope of function being created. When `TEMPORARY` is specified, the
-    created function is valid and visible in the current session. No persistent
-    entry is made in the catalog for these kind of functions.
+    created function is valid and visible in the current session. Temporary functions live in the
+    per-session `system.session` namespace. No persistent entry is made in the catalog for these
+    kind of functions.
 
 * **IF NOT EXISTS**
 
@@ -62,9 +63,19 @@ CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ]
 
 * **function_name**
 
-    Specifies a name of function to be created. The function name may be optionally qualified with a database name.
+    Specifies a name of function to be created.
 
-    **Syntax:** `[ database_name. ] function_name`
+    * For a **permanent** function the name may be optionally qualified with a database name
+      (or a catalog and database). If the name is not qualified the function is created in the
+      current schema.
+
+      **Syntax:** `[ catalog_name. ] [ database_name. ] function_name`
+
+    * For a **temporary** function the name may be optionally qualified with the session schema
+      (`session` or `system.session`). Any other qualifier is rejected with
+      `INVALID_TEMP_OBJ_QUALIFIER`.
+
+      **Syntax:** `[ { session | system.session } . ] function_name`
 
 * **class_name**
 
diff --git a/docs/sql-ref-syntax-ddl-create-sql-function.md b/docs/sql-ref-syntax-ddl-create-sql-function.md
index 649cd895a1974..19f3e120f070f 100644
--- a/docs/sql-ref-syntax-ddl-create-sql-function.md
+++ b/docs/sql-ref-syntax-ddl-create-sql-function.md
@@ -58,7 +58,10 @@ characteristic
 
 - **TEMPORARY**
 
-  The scope of the function being created. When you specify `TEMPORARY`, the created function is valid and visible in the current session. No persistent entry is made in the catalog.
+  The scope of the function being created. When you specify `TEMPORARY`, the created function is
+  valid and visible in the current session. Temporary functions live in the per-session
+  `system.session` namespace and are dropped when the session ends. No persistent entry is made in
+  the catalog.
 
 - **IF NOT EXISTS**
 
@@ -66,10 +69,23 @@ characteristic
 
 - **function_name**
 
-  A name for the function. For a permanent function, you can optionally qualify the function name, or it will be created under the current catalog and namespace.
-  If the name is not qualified the permanent function is created in the current schema.
+  A name for the function.
 
-  **Syntax:** `[ database_name. ] function_name`
+  * For a **permanent** function, you can optionally qualify the function name with a database name
+    (or a catalog and database). If the name is not qualified the permanent function is created in
+    the current schema.
+
+    **Syntax:** `[ catalog_name. ] [ database_name. ] function_name`
+
+  * For a **temporary** function, you can optionally qualify the function name with the session
+    schema (`session` or `system.session`). Any other qualifier &mdash; including
+    `system.builtin`, the current schema, or an arbitrary database name &mdash; is rejected with
+    `INVALID_TEMP_OBJ_QUALIFIER`. For example, `CREATE TEMPORARY FUNCTION session.f ...` and
+    `CREATE TEMPORARY FUNCTION system.session.f ...` are accepted.
+
+    **Syntax:** `[ { session | system.session } . ] function_name`
+
+  The function name must be unique among all routines (procedures and functions) in its schema.
 
 - **function_parameter**
 
@@ -126,6 +142,15 @@ characteristic
   - [Ranking functions](sql-ref-functions-builtin.md#ranking-window-functions)
   - Row producing functions such as `explode`
 
+  A persistent SQL UDF cannot reference temporary views, temporary functions, or session
+  variables.
+
+  The SQL Path in effect at `CREATE FUNCTION` time is captured into the function's metadata; the
+  body resolves against that frozen path on every invocation, not the invoker's current path.
+  `current_schema()` and `current_path()` inside the body still return the invoker's context.
+  Use [DESCRIBE FUNCTION EXTENDED](sql-ref-syntax-aux-describe-function.html) to inspect the
+  captured path. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html).
+
   Within the body of the function you can refer to parameter by its unqualified name or by qualifying the parameter with the function name.
 
 - **characteristic**
@@ -296,8 +321,74 @@ characteristic
  Returns:  INT
 ```
 
+### Create a temporary SQL function with a session qualifier
+
+```sql
+-- Unqualified, `session`-qualified, and `system.session`-qualified names all create the same
+-- temporary function in the per-session `system.session` namespace.
+> CREATE TEMPORARY FUNCTION add_one(x INT) RETURNS INT RETURN x + 1;
+
+> CREATE OR REPLACE TEMPORARY FUNCTION session.add_one(x INT) RETURNS INT
+    RETURN x + 1;
+
+> CREATE OR REPLACE TEMPORARY FUNCTION system.session.add_one(x INT) RETURNS INT
+    RETURN x + 1;
+
+-- All three names refer to the same temporary function:
+> SELECT add_one(1), session.add_one(1), system.session.add_one(1);
+ 2  2  2
+
+-- DROP TEMPORARY FUNCTION accepts the same qualifiers:
+> DROP TEMPORARY FUNCTION session.add_one;
+
+-- Any other qualifier on a TEMPORARY function is rejected.
+> CREATE TEMPORARY FUNCTION mydb.bad_temp() RETURNS INT RETURN 1;
+  [INVALID_TEMP_OBJ_QUALIFIER] qualifier `mydb` is not allowed for temporary FUNCTION ...
+
+> CREATE TEMPORARY FUNCTION system.builtin.bad_temp() RETURNS INT RETURN 1;
+  [INVALID_TEMP_OBJ_QUALIFIER] qualifier `system`.`builtin` is not allowed for temporary FUNCTION ...
+```
+
+### Frozen SQL Path
+
+A SQL UDF captures the SQL Path that is in effect at `CREATE FUNCTION` time. The body resolves
+against that frozen path on every invocation, even if the caller's session has set a different
+PATH. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html).
+
+```sql
+> CREATE SCHEMA path_a;
+> CREATE SCHEMA path_b;
+> CREATE TABLE path_a.t USING parquet AS SELECT 10 AS id;
+> CREATE TABLE path_b.t USING parquet AS SELECT 20 AS id;
+
+-- The PATH at CREATE FUNCTION time points at path_a, so unqualified `t` in the body binds to
+-- path_a.t.
+> SET PATH = spark_catalog.path_a, system.builtin;
+> CREATE FUNCTION default.frozen_fn() RETURNS INT
+    RETURN (SELECT MAX(id) FROM t);
+
+-- Flip the live PATH. The function body still resolves `t` against the frozen path.
+> SET PATH = spark_catalog.path_b, system.builtin;
+
+-- A bare query follows the LIVE path:
+> SELECT MAX(id) FROM t;
+ 20
+
+-- The function body follows its FROZEN path:
+> SELECT default.frozen_fn();
+ 10
+
+-- DESCRIBE FUNCTION EXTENDED shows the captured path:
+> DESC FUNCTION EXTENDED default.frozen_fn;
+ Function:    spark_catalog.default.frozen_fn
+ ...
+ SQL Path:    spark_catalog.path_a, system.builtin
+```
+
 ### Related Statements
 
 * [SHOW FUNCTIONS](sql-ref-syntax-aux-show-functions.html)
 * [DESCRIBE FUNCTION](sql-ref-syntax-aux-describe-function.html)
 * [DROP FUNCTION](sql-ref-syntax-ddl-drop-function.html)
+* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html)
+* [Name Resolution](sql-ref-name-resolution.html)
diff --git a/docs/sql-ref-syntax-ddl-create-view.md b/docs/sql-ref-syntax-ddl-create-view.md
index 2d832636b38fc..f6fc6c0e85c75 100644
--- a/docs/sql-ref-syntax-ddl-create-view.md
+++ b/docs/sql-ref-syntax-ddl-create-view.md
@@ -40,9 +40,11 @@ CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_ident
 
 * **[ GLOBAL ] TEMPORARY**
 
-    TEMPORARY views are session-scoped and will be dropped when session ends
-    because it skips persisting the definition in the underlying metastore, if any.
-    GLOBAL TEMPORARY views are tied to a system preserved temporary database `global_temp`.
+    `TEMPORARY` views are session-scoped and are dropped when the session ends;
+    no entry is persisted in the underlying metastore.
+    Temporary views live in the per-session `system.session` namespace.
+
+    `GLOBAL TEMPORARY` views are tied to the system-preserved temporary database `global_temp`.
 
 * **IF NOT EXISTS**
 
@@ -51,9 +53,23 @@ CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_ident
 
 * **view_identifier**
 
-    Specifies a view name, which may be optionally qualified with a database name.
+    Specifies a view name.
+
+    * For a **persistent** view the name may be optionally qualified with a database name (or a
+      catalog and database). If the name is not qualified the view is created in the current
+      schema.
+
+      **Syntax:** `[ catalog_name. ] [ database_name. ] view_name`
 
-    **Syntax:** `[ database_name. ] view_name`
+    * For a **temporary** view the name may be optionally qualified with the session schema
+      (`session` or `system.session`). Any other qualifier is rejected with
+      `INVALID_TEMP_OBJ_QUALIFIER`. For example, `CREATE TEMPORARY VIEW session.v ...` and
+      `CREATE TEMPORARY VIEW system.session.v ...` are accepted; `CREATE TEMPORARY VIEW mydb.v ...`
+      is not.
+
+      **Syntax:** `[ { session | system.session } . ] view_name`
+
+    The fully qualified view name must be unique within its schema.
 
 * **create_view_clauses**
 
@@ -75,8 +91,16 @@ CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_ident
       The default is `WITH SCHEMA COMPENSATION`.
 
 * **query**
+
   A [SELECT](sql-ref-syntax-qry-select.html) statement that constructs the view from base tables or other views.
 
+  A persistent view cannot reference temporary views, temporary functions, or session variables.
+
+  For a persistent view, the SQL Path in effect at `CREATE VIEW` time is captured into the view's
+  metadata; the body resolves against that frozen path on every reference, not the invoker's
+  current path. Use [DESCRIBE EXTENDED](sql-ref-syntax-aux-describe-table.html) to inspect the
+  captured path. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html).
+
 ### Examples
 
 ```sql
@@ -98,8 +122,74 @@ CREATE OR REPLACE VIEW open_orders WITH SCHEMA EVOLUTION
     AS SELECT * FROM orders WHERE status = 'open';
 ```
 
+### Create a temporary view with a session qualifier
+
+```sql
+-- Unqualified, `session`-qualified, and `system.session`-qualified names all create the same
+-- temporary view in the per-session `system.session` namespace.
+CREATE TEMPORARY VIEW recent_orders
+    AS SELECT * FROM orders WHERE order_date > current_date - INTERVAL 7 DAYS;
+
+CREATE OR REPLACE TEMPORARY VIEW session.recent_orders
+    AS SELECT * FROM orders WHERE order_date > current_date - INTERVAL 7 DAYS;
+
+CREATE OR REPLACE TEMPORARY VIEW system.session.recent_orders
+    AS SELECT * FROM orders WHERE order_date > current_date - INTERVAL 7 DAYS;
+
+-- All three names address the same temporary view:
+SELECT count(*) FROM recent_orders;
+SELECT count(*) FROM session.recent_orders;
+SELECT count(*) FROM system.session.recent_orders;
+
+-- DROP VIEW accepts the same qualifiers (there is no DROP TEMPORARY VIEW form):
+DROP VIEW session.recent_orders;
+
+-- Any other qualifier on a TEMPORARY view is rejected.
+CREATE TEMPORARY VIEW mydb.bad_temp AS SELECT 1;
+  [INVALID_TEMP_OBJ_QUALIFIER] qualifier `mydb` is not allowed for temporary VIEW ...
+
+CREATE TEMPORARY VIEW system.builtin.bad_temp AS SELECT 1;
+  [INVALID_TEMP_OBJ_QUALIFIER] qualifier `system`.`builtin` is not allowed for temporary VIEW ...
+```
+
+### Frozen SQL Path
+
+A persistent view captures the SQL Path that is in effect at `CREATE VIEW` time. The view body
+resolves against that frozen path on every reference, even when the caller's session has set a
+different PATH. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html).
+
+```sql
+> CREATE SCHEMA views_a;
+> CREATE SCHEMA views_b;
+> CREATE TABLE views_a.t USING parquet AS SELECT 1 AS id;
+> CREATE TABLE views_b.t USING parquet AS SELECT 2 AS id;
+
+-- The PATH at CREATE VIEW time points at views_a, so unqualified `t` in the view body binds to
+-- views_a.t.
+> SET PATH = spark_catalog.views_a, system.builtin;
+> CREATE VIEW default.v_frozen AS SELECT id FROM t;
+
+-- Flip the live PATH. The view body still resolves `t` against the frozen path.
+> SET PATH = spark_catalog.views_b, system.builtin;
+
+-- A bare query follows the LIVE path:
+> SELECT id FROM t;
+ 2
+
+-- The view body follows its FROZEN path:
+> SELECT id FROM default.v_frozen;
+ 1
+
+-- DESCRIBE EXTENDED shows the captured path:
+> DESCRIBE EXTENDED default.v_frozen;
+ ...
+ SQL Path  spark_catalog.views_a, system.builtin
+```
+
 ### Related Statements
 
 * [ALTER VIEW](sql-ref-syntax-ddl-alter-view.html)
 * [DROP VIEW](sql-ref-syntax-ddl-drop-view.html)
 * [SHOW VIEWS](sql-ref-syntax-aux-show-views.html)
+* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html)
+* [Name Resolution](sql-ref-name-resolution.html)
diff --git a/docs/sql-ref-syntax-ddl-drop-function.md b/docs/sql-ref-syntax-ddl-drop-function.md
index bef31d74afcff..b9272e34b81d6 100644
--- a/docs/sql-ref-syntax-ddl-drop-function.md
+++ b/docs/sql-ref-syntax-ddl-drop-function.md
@@ -34,14 +34,18 @@ DROP [ TEMPORARY ] FUNCTION [ IF EXISTS ] function_name
 
 * **function_name**
 
-    Specifies the name of an existing function. The function name may be
-    optionally qualified with a database name.
+    Specifies the name of an existing function. With `TEMPORARY`, the name may optionally be
+    qualified with `session` or `system.session`. Without `TEMPORARY`, the name may optionally be
+    qualified with a database (or a catalog and database) and resolves to a persistent function.
 
-    **Syntax:** `[ database_name. ] function_name`
+    **Syntax:** `[ catalog_name. ] [ database_name. ] function_name`
+
+    Functions in `system.builtin` cannot be dropped.
 
 * **TEMPORARY**
 
-    Should be used to delete the `TEMPORARY` function.
+    Required to drop a temporary function. Without `TEMPORARY`, `DROP FUNCTION` only considers
+    persistent functions.
 
 * **IF EXISTS**
 
diff --git a/docs/sql-ref-syntax-ddl-drop-view.md b/docs/sql-ref-syntax-ddl-drop-view.md
index 5b680d7f907e0..16f711a9074eb 100644
--- a/docs/sql-ref-syntax-ddl-drop-view.md
+++ b/docs/sql-ref-syntax-ddl-drop-view.md
@@ -37,9 +37,11 @@ DROP VIEW [ IF EXISTS ] view_identifier
 
 * **view_identifier**
 
-    Specifies the view name to be dropped. The view name may be optionally qualified with a database name.
+    Specifies the view name to be dropped. The name may be optionally qualified with a database
+    name (or a catalog and database). A name qualified with `session` or `system.session`
+    targets a temporary view.
 
-    **Syntax:** `[ database_name. ] view_name`
+    **Syntax:** `[ catalog_name. ] [ database_name. ] view_name`
 
 ### Examples
 
@@ -53,12 +55,20 @@ DROP VIEW userdb.employeeView;
 -- Assumes a view named `employeeView` does not exist.
 -- Throws exception
 DROP VIEW employeeView;
-Error: org.apache.spark.sql.AnalysisException: Table or view not found: employeeView;
-(state=,code=0)
+Error: TABLE_OR_VIEW_NOT_FOUND
 
 -- Assumes a view named `employeeView` does not exist,Try with IF EXISTS
 -- this time it will not throw exception
 DROP VIEW IF EXISTS employeeView;
+
+-- A temporary view that shadows a persistent view with the same name.
+-- An unqualified DROP VIEW drops the temporary view first; qualifying with `session`
+-- always targets the temporary view explicitly.
+CREATE VIEW default.recent_orders AS SELECT * FROM orders WHERE order_date > current_date - 7;
+CREATE TEMPORARY VIEW recent_orders AS SELECT * FROM orders WHERE order_date = current_date;
+
+DROP VIEW session.recent_orders;             -- drops the temporary view
+DROP VIEW default.recent_orders;             -- drops the persistent view
 ```
 
 ### Related Statements
diff --git a/docs/sql-ref-syntax-qry-select-join.md b/docs/sql-ref-syntax-qry-select-join.md
index 698884dc28b57..646297831d1cc 100644
--- a/docs/sql-ref-syntax-qry-select-join.md
+++ b/docs/sql-ref-syntax-qry-select-join.md
@@ -26,7 +26,7 @@ A SQL join is used to combine rows from two relations based on join criteria. Th
 ### Syntax
 
 ```sql
-relation { [ join_type ] JOIN [ LATERAL ] relation [ join_criteria ] | NATURAL join_type JOIN [ LATERAL ] relation }
+relation { [ join_type ] JOIN [ LATERAL ] relation [ join_criteria | nearest_by_clause ] | NATURAL join_type JOIN [ LATERAL ] relation }
 ```
 
 ### Parameters
@@ -53,6 +53,30 @@ relation { [ join_type ] JOIN [ LATERAL ] relation [ join_criteria ] | NATURAL j
 
     Specifies an expression with a return type of boolean.
 
+* **nearest_by_clause**
+
+    Specifies a nearest-by top-K ranking join. For each row on the left (query side), returns up to `num_results` rows from the right (base side), ranked by `ranking_expression`. Only `INNER` (the default) and `LEFT OUTER` join types are supported with this clause.
+
+    **Syntax:** `{ APPROX | EXACT } NEAREST [ num_results ] BY { DISTANCE | SIMILARITY } ranking_expression`
+
+    `APPROX | EXACT`
+
+    Controls the search algorithm contract. `APPROX` allows the optimizer to use faster approximate strategies (such as indexed nearest-neighbor search when available). `EXACT` forces brute-force evaluation.
+
+    `num_results`
+
+    A positive integer literal between 1 and 100000 that limits the number of matches per left row. Defaults to 1 when omitted.
+
+    `DISTANCE | SIMILARITY`
+
+    `DISTANCE` ranks rows by smallest value of `ranking_expression` first. `SIMILARITY` ranks rows by largest value first. Matched right-side rows are emitted in best-first order: smallest ranking value first under `DISTANCE`, largest first under `SIMILARITY`. (Downstream operators may reorder; add an explicit `ORDER BY` if you need to lock in the ordering.)
+
+    `ranking_expression`
+
+    A scalar expression that returns an orderable type. The expression is evaluated once per (left, right) pair on the brute-force path, so avoid expensive or side-effecting UDFs in ranking expressions.
+
+    **Performance note.** The current implementation evaluates the full cross-product of the left and right sides and bounds memory per left row by `num_results`. Per-query work is `O(|left| × |right| × log num_results)`. Index-backed approximate strategies (transparent to `APPROX` queries) are planned in a future release; until then, pre-filter the right side (e.g. via a subquery) when it is large.
+
 ### Join Types
 
 #### **Inner Join**
diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md
index d8c37dc021985..1e0ea4a2b8d64 100644
--- a/docs/sql-ref-syntax.md
+++ b/docs/sql-ref-syntax.md
@@ -29,7 +29,8 @@ Data Definition Statements are used to create or modify the structure of databas
  * [ALTER TABLE](sql-ref-syntax-ddl-alter-table.html)
  * [ALTER VIEW](sql-ref-syntax-ddl-alter-view.html)
  * [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
- * [CREATE FUNCTION](sql-ref-syntax-ddl-create-function.html)
+ * [CREATE FUNCTION (External)](sql-ref-syntax-ddl-create-function.html)
+ * [CREATE FUNCTION (SQL)](sql-ref-syntax-ddl-create-sql-function.html)
  * [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
  * [CREATE VIEW](sql-ref-syntax-ddl-create-view.html)
  * [DECLARE VARIABLE](sql-ref-syntax-ddl-declare-variable.html)
@@ -123,6 +124,7 @@ You use SQL scripting to execute procedural logic in SQL.
  * [REFRESH FUNCTION](sql-ref-syntax-aux-cache-refresh-function.html)
  * [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html)
  * [SET](sql-ref-syntax-aux-conf-mgmt-set.html)
+ * [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html)
  * [SET VAR](sql-ref-syntax-aux-set-var.html)
  * [SHOW COLLATIONS](sql-ref-syntax-aux-show-collations.html)
  * [SHOW COLUMNS](sql-ref-syntax-aux-show-columns.html)
diff --git a/docs/web-ui.md b/docs/web-ui.md
index 3889b41f03a04..6ae0a363d1873 100644
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -28,6 +28,31 @@ to monitor the status and resource consumption of your Spark cluster.
 * This will become a table of contents (this text will be scraped).
 {:toc}
 
+## Overview
+
+The Web UI is built into every Spark application: while the application is
+running, it serves a set of web pages that let you inspect what is happening
+inside it. Typical uses include monitoring a running job, diagnosing a
+failure, analyzing the execution plan of a slow SQL query, and checking how
+memory and tasks are distributed across executors.
+
+By default the Web UI is available at `http://<driver-host>:4040`. When that
+port is already in use (for example, when several Spark applications run on
+the same host), Spark tries `4041`, `4042`, and so on until it finds a free
+port, and logs the chosen port at startup. You can override the default port
+with `spark.ui.port`, and tune other UI behavior through the `spark.ui.*`
+properties documented in the [Configuration](configuration.html#spark-ui)
+reference.
+
+The Web UI is tied to the lifetime of the application: once it exits, the UI
+is no longer reachable. To inspect an application after it has finished,
+enable event logging and run the Spark History Server, which reconstructs an
+equivalent UI from the persisted event log; see
+[Monitoring and Instrumentation](monitoring.html) for setup details.
+
+The remaining sections walk through each tab in the Web UI's top navigation
+bar.
+
 ## Jobs Tab
 The Jobs tab displays a summary page of all jobs in the Spark application and a details page
 for each job. The summary page shows high-level information, such as the status, duration, and
@@ -35,64 +60,33 @@ progress of all jobs and the overall event timeline. When you click on a job on
 page, you see the details page for that job. The details page further shows the event timeline,
 DAG visualization, and all stages of the job.
 
-The information that is displayed in this section is
-* User: Current Spark user
-* Started At: The startup time of Spark application
-* Total uptime: Time since Spark application started
+The information displayed at the top of the page includes:
+
 * Scheduling mode: See [job scheduling](job-scheduling.html#configuring-pool-properties)
 * Number of jobs per status: Active, Completed, Failed
-
-<p style="text-align: center;">
-  <img src="img/AllJobsPageDetail1.png" title="Basic info" alt="Basic info" width="20%"/>
-</p>
-
 * Event timeline: Displays in chronological order the events related to the executors (added, removed) and the jobs
-
-<p style="text-align: center;">
-  <img src="img/AllJobsPageDetail2.png" title="Event timeline" alt="Event timeline"/>
-</p>
-
 * Details of jobs grouped by status: Displays detailed information of the jobs including Job ID, description (with a link to detailed job page), submitted time, duration, stages summary and tasks progress bar
 
+The current user, application start time, and total uptime are shown in the footer at the
+bottom of every page.
+
 <p style="text-align: center;">
-  <img src="img/AllJobsPageDetail3.png" title="Details of jobs grouped by status" alt="Details of jobs grouped by status"/>
+  <img src="img/AllJobsPage.png" title="All Jobs page" alt="All Jobs page" width="100%"/>
 </p>
 
-
-When you click on a specific job, you can see the detailed information of this job.
-
 ### Jobs detail
 
 This page displays the details of a specific job identified by its job ID.
+
 * Job Status: (running, succeeded, failed)
 * Number of stages per status (active, pending, completed, skipped, failed)
-* Associated SQL Query: Link to the sql tab for this job
+* Associated SQL Query: Link to the SQL tab for this job
 * Event timeline: Displays in chronological order the events related to the executors (added, removed) and the stages of the job
+* DAG visualization: Visual representation of the directed acyclic graph of this job where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied on RDD
+* List of stages (grouped by state active, pending, completed, skipped, and failed), with columns including Stage ID, description, submitted timestamp, duration, tasks progress bar, **Input** (bytes read from storage), **Output** (bytes written to storage), **Shuffle read** (total shuffle bytes and records read locally and from remote executors), and **Shuffle write** (bytes and records written to disk for a future shuffle)
 
 <p style="text-align: center;">
-  <img src="img/JobPageDetail1.png" title="Event timeline" alt="Event timeline"/>
-</p>
-
-* DAG visualization: Visual representation of the directed acyclic graph of this job where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied on RDD.
-* An example of DAG visualization for `sc.parallelize(1 to 100).toDF.count()`
-
-<p style="text-align: center;">
-  <img src="img/JobPageDetail2.png" title="DAG" alt="DAG" width="40%">
-</p>
-
-* List of stages (grouped by state active, pending, completed, skipped, and failed)
-    * Stage ID
-    * Description of the stage
-    * Submitted timestamp
-    * Duration of the stage
-    * Tasks progress bar
-    * Input: Bytes read from storage in this stage
-    * Output: Bytes written in storage in this stage
-    * Shuffle read: Total shuffle bytes and records read, includes both data read locally and data read from remote executors
-    * Shuffle write: Bytes and records written to disk in order to be read by a shuffle in a future stage
-
-<p style="text-align: center;">
-  <img src="img/JobPageDetail3.png" title="DAG" alt="DAG">
+  <img src="img/JobPage.png" title="Job detail page" alt="Job detail page" width="100%"/>
 </p>
 
 ## Stages Tab
@@ -100,41 +94,36 @@ This page displays the details of a specific job identified by its job ID.
 The Stages tab displays a summary page that shows the current state of all stages of all jobs in
 the Spark application.
 
-At the beginning of the page is the summary with the count of all stages by status (active, pending, completed, skipped, and failed)
+At the top of the page is a summary with the count of all stages by status (active, pending,
+completed, skipped, and failed). In [Fair scheduling mode](job-scheduling.html#scheduling-within-an-application)
+a table of [pool properties](job-scheduling.html#configuring-pool-properties) is also shown.
 
-<p style="text-align: center;">
-  <img src="img/AllStagesPageDetail1.png" title="Stages header" alt="Stages header" width="30%">
-</p>
-
-In [Fair scheduling mode](job-scheduling.html#scheduling-within-an-application) there is a table that displays [pools properties](job-scheduling.html#configuring-pool-properties)
+Below the summary are the stages, grouped by status (active, pending, completed, skipped, failed).
+An active stage shows a small **(kill)** link next to its description; clicking it asks Spark
+to cancel that stage. Only failed stages show the failure reason. Click a stage's description
+to open its [Stage detail](#stage-detail) page.
 
 <p style="text-align: center;">
-  <img src="img/AllStagesPageDetail2.png" title="Pool properties" alt="Pool properties">
-</p>
-
-After that are the details of stages per status (active, pending, completed, skipped, failed). In active stages, it's possible to kill the stage with the kill link. Only in failed stages, failure reason is shown. Task detail can be accessed by clicking on the description.
-
-<p style="text-align: center;">
-  <img src="img/AllStagesPageDetail3.png" title="Stages detail" alt="Stages detail">
+  <img src="img/AllStagesPage.png" title="Stages tab" alt="Stages tab" width="100%">
 </p>
 
 ### Stage detail
-The stage detail page begins with information like total time across all tasks, [Locality level summary](tuning.html#data-locality), [Shuffle Read Size / Records](rdd-programming-guide.html#shuffle-operations) and Associated Job IDs.
 
-<p style="text-align: center;">
-  <img src="img/AllStagesPageDetail4.png" title="Stage header" alt="Stage header" width="30%">
-</p>
+The stage detail page begins with information like total time across all tasks,
+[Locality level summary](tuning.html#data-locality),
+[Shuffle Read Size / Records](rdd-programming-guide.html#shuffle-operations) and Associated Job IDs.
 
-There is also a visual representation of the directed acyclic graph (DAG) of this stage, where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied.
-Nodes are grouped by operation scope in the DAG visualization and labelled with the operation scope name (BatchScan, WholeStageCodegen, Exchange, etc).
-Notably, Whole Stage Code Generation operations are also annotated with the code generation id. For stages belonging to Spark DataFrame or SQL execution, this allows to cross-reference Stage execution details to the relevant details in the Web-UI SQL Tab page where SQL plan graphs and execution plans are reported.
+It also shows a visual representation of the directed acyclic graph (DAG) of this stage,
+where vertices represent the RDDs or DataFrames and the edges represent an operation to be
+applied. Nodes are grouped by operation scope in the DAG visualization and labelled with the
+operation scope name (`BatchScan`, `WholeStageCodegen`, `Exchange`, etc).
+Notably, whole-stage code generation operations are also annotated with the code generation id.
+For stages belonging to Spark DataFrame or SQL execution, this allows you to cross-reference
+stage execution details to the relevant query in the [SQL Tab](#sql-tab).
 
-<p style="text-align: center;">
-  <img src="img/AllStagesPageDetail5.png" title="Stage DAG" alt="Stage DAG" width="50%">
-</p>
+Summary metrics for all tasks are represented in a table and in a timeline:
 
-Summary metrics for all task are represented in a table and in a timeline.
-* **[Tasks deserialization time](configuration.html#compression-and-serialization)**
+* **Task deserialization time** is the time spent deserializing the task closure on an executor before it can run.
 * **Duration of tasks**.
 * **GC time** is the total JVM garbage collection time.
 * **Result serialization time** is the time spent serializing the task result on an executor before sending it back to the driver.
@@ -148,26 +137,14 @@ Summary metrics for all task are represented in a table and in a timeline.
 * **Shuffle spill (memory)** is the size of the deserialized form of the shuffled data in memory.
 * **Shuffle spill (disk)** is the size of the serialized form of the data on disk.
 
-<p style="text-align: center;">
-  <img src="img/AllStagesPageDetail6.png" title="Stages metrics" alt="Stages metrics">
-</p>
-
-Aggregated metrics by executor show the same information aggregated by executor.
-
-<p style="text-align: center;">
-  <img src="img/AllStagesPageDetail7.png" title="Stages metrics per executor" alt="Stages metrics per executors">
-</p>
-
-**[Accumulators](rdd-programming-guide.html#accumulators)** are a type of shared variables. It provides a mutable variable that can be updated inside of a variety of transformations. It is possible to create accumulators with and without name, but only named accumulators are displayed.
+The same metrics are also shown aggregated by executor.
+**[Accumulators](rdd-programming-guide.html#accumulators)** are shared variables that can be
+updated inside transformations; only named accumulators are displayed here. Finally, a tasks
+table shows the same information broken down per task, with links to executor logs and the task
+attempt number for failures.
 
 <p style="text-align: center;">
-  <img src="img/AllStagesPageDetail8.png" title="Stage accumulator" alt="Stage accumulator">
-</p>
-
-Tasks details basically includes the same information as in the summary section but detailed by task. It also includes links to review the logs and the task attempt number if it fails for any reason. If there are named accumulators, here it is possible to see the accumulator value at the end of each task.
-
-<p style="text-align: center;">
-  <img src="img/AllStagesPageDetail9.png" title="Tasks" alt="Tasks">
+  <img src="img/StagePage.png" title="Stage detail" alt="Stage detail" width="100%">
 </p>
 
 ## Storage Tab
@@ -224,8 +201,11 @@ distribution on the cluster.
 
 
 ## Environment Tab
-The Environment tab displays the values for the different environment and configuration variables,
-including JVM, Spark, and system properties.
+
+The Environment tab is the place to verify that your Spark application is
+running with the configuration you expect. It groups the environment and
+configuration information into a set of sub-tabs along the left side of the
+page; clicking one switches the panel on the right.
 
 <p style="text-align: center;">
   <img src="img/webui-env-tab.png"
@@ -235,47 +215,32 @@ including JVM, Spark, and system properties.
   <!-- Images are downsized intentionally to improve quality on retina displays -->
 </p>
 
-This environment page has five parts. It is a useful place to check whether your properties have
-been set correctly.
-The first part 'Runtime Information' simply contains the [runtime properties](configuration.html#runtime-environment)
-like versions of Java and Scala.
-The second part 'Spark Properties' lists the [application properties](configuration.html#application-properties) like
-['spark.app.name'](configuration.html#application-properties) and 'spark.driver.memory'.
-
-<p style="text-align: center;">
-  <img src="img/webui-env-hadoop.png"
-       title="Hadoop Properties"
-       alt="Hadoop Properties"
-       width="100%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
-Clicking the 'Hadoop Properties' link displays properties relative to Hadoop and YARN. Note that properties like
-['spark.hadoop.*'](configuration.html#execution-behavior) are shown not in this part but in 'Spark Properties'.
-
-<p style="text-align: center;">
-  <img src="img/webui-env-sys.png"
-       title="System Properties"
-       alt="System Properties"
-       width="100%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
-'System Properties' shows more details about the JVM.
-
-<p style="text-align: center;">
-  <img src="img/webui-env-class.png"
-       title="Classpath Entries"
-       alt="Classpath Entries"
-       width="100%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
-
-The last part 'Classpath Entries' lists the classes loaded from different sources, which is very useful
-to resolve class conflicts.
+The sub-tabs are:
+
+* **Runtime Information** &mdash; JVM, Scala, and other
+  [runtime properties](configuration.html#runtime-environment) of the driver.
+* **Spark Properties** &mdash; the effective
+  [application properties](configuration.html#application-properties)
+  (such as `spark.app.name` and `spark.driver.memory`). Note that
+  [`spark.hadoop.*`](configuration.html#execution-behavior) properties are
+  listed here, not under Hadoop Properties.
+* **Resource Profiles** &mdash; CPU, memory, and accelerator resource
+  requests for each [resource profile](configuration.html#stage-level-scheduling-overview)
+  in use.
+* **Hadoop Properties** &mdash; values loaded from Hadoop and YARN configuration
+  files.
+* **System Properties** &mdash; the underlying JVM system properties.
+* **Metrics Properties** &mdash; the configuration loaded for the
+  [metrics system](monitoring.html#metrics).
+* **Classpath Entries** &mdash; the classes loaded into the driver, broken
+  down by source. Handy when tracking down class conflicts.
 
 ## Executors Tab
-The Executors tab displays summary information about the executors that were created for the
-application, including memory and disk usage and task and shuffle information. The Storage Memory
-column shows the amount of memory used and reserved for caching data.
+The Executors tab lists every executor that has been allocated to the
+application, including the driver. Each row shows resource usage (memory,
+disk, cores), storage memory reserved for cached data, task counts, shuffle
+totals, and performance signals such as
+[GC time](tuning.html#garbage-collection-tuning).
 
 <p style="text-align: center;">
   <img src="img/webui-exe-tab.png"
@@ -285,51 +250,28 @@ column shows the amount of memory used and reserved for caching data.
   <!-- Images are downsized intentionally to improve quality on retina displays -->
 </p>
 
-The Executors tab provides not only resource information (amount of memory, disk, and cores used by each executor)
-but also performance information ([GC time](tuning.html#garbage-collection-tuning) and shuffle information).
-
-<p style="text-align: center;">
-  <img src="img/webui-exe-err.png"
-       title="Stderr Log"
-       alt="Stderr Log"
-       width="80%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
-
-Clicking the 'stderr' link of executor 0 displays detailed [standard error log](spark-standalone.html#monitoring-and-logging)
-in its console.
-
-<p style="text-align: center;">
-  <img src="img/webui-exe-thread.png"
-       title="Thread Dump"
-       alt="Thread Dump"
-       width="80%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
-
-Clicking the 'Thread Dump' link of executor 0 displays the thread dump of JVM on executor 0, which is pretty useful
-for performance analysis.
+Each row carries a set of detail links &mdash; **Thread Dump**, **Heap
+Histogram**, and **Flame Graph** &mdash; that open the corresponding live
+data for that executor in a side panel without leaving the page. The panel
+can be resized by dragging its left edge. The **stderr** and **stdout**
+links open the executor's log files in a new view; the exact location of
+those logs depends on your cluster manager (see
+[Monitoring and Instrumentation](monitoring.html) for details).
 
 ## SQL Tab
-If the application executes Spark SQL queries, the SQL tab displays information, such as the duration,
-jobs, and physical and logical plans for the queries. Here we include a basic example to illustrate
-this tab:
-{% highlight scala %}
-scala> val df = Seq((1, "andy"), (2, "bob"), (2, "andy")).toDF("count", "name")
-df: org.apache.spark.sql.DataFrame = [count: int, name: string]
 
-scala> df.count
-res0: Long = 3
+### Query Listing
 
-scala> df.createGlobalTempView("df")
+The SQL tab lists all SQL and DataFrame queries submitted to the Spark
+application. Any DataFrame action that triggers execution (such as `count`,
+`show`, or `write`) shows up here, not only queries written as SQL strings.
+Here is a short example that produces a few entries:
 
-scala> spark.sql("select name,sum(count) from global_temp.df group by name").show
-+----+----------+
-|name|sum(count)|
-+----+----------+
-|andy|         3|
-| bob|         2|
-+----+----------+
+{% highlight python %}
+df = spark.createDataFrame([(1, "andy"), (2, "bob"), (2, "andy")], ["count", "name"])
+df.count()
+df.createOrReplaceTempView("df")
+spark.sql("SELECT name, SUM(count) FROM df GROUP BY name").show()
 {% endhighlight %}
 
 <p style="text-align: center;">
@@ -340,44 +282,47 @@ scala> spark.sql("select name,sum(count) from global_temp.df group by name").sho
   <!-- Images are downsized intentionally to improve quality on retina displays -->
 </p>
 
-Now the above three dataframe/SQL operators are shown in the list. If we click the
-'show at \<console\>: 24' link of the last query, we will see the DAG and details of the query execution.
-
-<p style="text-align: center;">
-  <img src="img/webui-sql-dag.png"
-       title="SQL DAG"
-       alt="SQL DAG"
-       width="50%" />
-  <!-- Images are downsized intentionally to improve quality on retina displays -->
-</p>
+The listing supports sorting by column, searching, filtering by status,
+and pagination, which makes it easy to locate a specific query in
+long-running applications.
 
-The query details page displays information about the query execution time, its duration,
-the list of associated jobs, and the query execution DAG.
-The first block 'WholeStageCodegen (1)' compiles multiple operators ('LocalTableScan' and 'HashAggregate') together into a single Java
-function to improve performance, and metrics like number of rows and spill size are listed in the block.
-The annotation '(1)' in the block name is the code generation id.
-The second block 'Exchange' shows the metrics on the shuffle exchange, including
-number of written shuffle records, total data size, etc.
+### SQL Plan Visualization
 
+Each query in the listing has a graph view of its operators. Every node
+shows the operator name together with its metrics inline, and the edges
+follow the data flow. You can pan and zoom the graph to navigate large
+plans, search for a node by name, and click any node to open a side panel
+with its full details.
 
 <p style="text-align: center;">
-  <img src="img/webui-sql-plan.png"
-       title="logical plans and the physical plan"
-       alt="logical plans and the physical plan"
+  <img src="img/webui-sql-dag.png"
+       title="SQL plan visualization"
+       alt="SQL plan visualization"
        width="80%" />
   <!-- Images are downsized intentionally to improve quality on retina displays -->
 </p>
-Clicking the 'Details' link on the bottom displays the logical plans and the physical plan, which
-illustrate how Spark parses, analyzes, optimizes and performs the query.
-Steps in the physical plan subject to whole stage code generation optimization, are prefixed by a star followed by
-the code generation id, for example: '*(1) LocalTableScan'
+
+### Execution Detail Page
+
+The execution detail page, opened by clicking the **ID** or **Description**
+link of any row in the query listing, gathers everything recorded for a
+single query. The header lists the
+query's submission time, duration, status, description, and the jobs and
+stages associated with it. The
+[SQL Plan Visualization](#sql-plan-visualization) shows the graph of
+operators. At the bottom of the page, a "Details" link expands the full
+text of the parsed, analyzed, and optimized logical plans together with
+the physical plan, useful when you want to see how Spark transformed your
+query during planning.
 
 ### SQL metrics
 
-The metrics of SQL operators are shown in the block of physical operators. The SQL metrics can be useful
-when we want to dive into the execution details of each operator. For example, "number of output rows"
-can answer how many rows are output after a Filter operator, "shuffle bytes written total" in an Exchange
-operator shows the number of bytes written by a shuffle.
+Each node in the [SQL Plan Visualization](#sql-plan-visualization) carries
+its own metrics inline. These metrics are useful when you want to dive into
+the execution details of each operator. For example, `number of output rows`
+shows how many rows pass through a `Filter` operator, and
+`shuffle bytes written` in an `Exchange` shows how much data the
+shuffle wrote.
 
 Here is the list of SQL metrics:
 
diff --git a/examples/pom.xml b/examples/pom.xml
index 30a728cf8abad..7edcc47c2a2ff 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 7507186480adf..0d7048731b297 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index e64023e3b3e36..a308783f8a120 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala b/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala
index 44a521bd636c5..ae4f5660facab 100644
--- a/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala
+++ b/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala
@@ -115,6 +115,17 @@ class PathOutputCommitProtocol(
         // failures. Warn
         logTrace(s"Committer $committer may not be tolerant of task commit failures")
       }
+
+      if (dynamicPartitionOverwrite) {
+        // FileOutputCommitter must be initialized with the staging directory so that task output
+        // lands under stagingDir/_temporary/... and commitJob can later delete the old partition
+        // directories and move staged files to final dest. Without this, the committer writes
+        // directly to the final path and the dynamic-overwrite cleanup in commitJob never sees any
+        // partitionPaths.
+        val ctor =
+          committer.getClass.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
+        committer = ctor.newInstance(stagingDir, context)
+      }
     } else {
       // if required other committers need to be checked for dynamic partition
       // compatibility through a StreamCapabilities probe.
@@ -161,6 +172,11 @@ class PathOutputCommitProtocol(
     }.getOrElse(workDir)
     val file = new Path(parent, getFilename(taskContext, spec))
     logTrace(s"Creating task file $file for dir $dir and spec $spec")
+    if (dynamicPartitionOverwrite && committer.isInstanceOf[FileOutputCommitter]) {
+      assert(dir.isDefined,
+        "The dataset to be written must be partitioned when dynamicPartitionOverwrite is true.")
+      partitionPaths += dir.get
+    }
     file.toString
   }
 
diff --git a/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala b/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala
index ee89adab94fb9..954d9011f3bbe 100644
--- a/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala
+++ b/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala
@@ -30,6 +30,19 @@ import org.apache.spark.internal.io.{FileCommitProtocol, FileNameSpec}
 import org.apache.spark.internal.io.cloud.PathOutputCommitProtocol.{CAPABILITY_DYNAMIC_PARTITIONING, OUTPUTCOMMITTER_FACTORY_SCHEME}
 import org.apache.spark.network.util.JavaUtils
 
+/**
+ * Subclass that exposes the protected `partitionPaths` field so tests can
+ * assert on it without going through the full `commitTask` path (which
+ * requires `SparkEnv`).
+ */
+private class PathOutputCommitProtocolForTest(
+    jobId: String,
+    dest: String,
+    dynamicPartitionOverwrite: Boolean)
+  extends PathOutputCommitProtocol(jobId, dest, dynamicPartitionOverwrite) {
+  def capturedPartitionPaths: Set[String] = partitionPaths.toSet
+}
+
 class CommitterBindingSuite extends SparkFunSuite {
 
   private val jobId = "2007071202143_0101"
@@ -264,5 +277,107 @@ class CommitterBindingSuite extends SparkFunSuite {
       "org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory")
   }
 
-}
+  /**
+   * With dynamicPartitionOverwrite=true and a FileOutputCommitter, newTaskTempFile must route
+   * output through the staging directory (not the final output path) and must record the partition
+   * in partitionPaths so that commitJob can delete the old partition directory and rename the
+   * staged one into place.
+   */
+  test("SPARK-56588: FileOutputCommitter dynamic partition overwrite stages output and tracks " +
+      "partitions") {
+    val jobCommitDir = File.createTempFile("dyn-part-overwrite-staging", "")
+    try {
+      jobCommitDir.delete()
+      val jobUri = jobCommitDir.toURI
+      val path = new Path(jobUri)
+      val job = newJob(path)
+      val conf = job.getConfiguration
+      conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttempt0)
+      conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1)
+      bindToFileOutputCommitterFactory(conf, "file")
+      val tContext = new TaskAttemptContextImpl(conf, taskAttemptId0)
+      val committer = new PathOutputCommitProtocolForTest(jobId, jobUri.toString, true)
+      committer.setupJob(tContext)
+      committer.setupTask(tContext)
+
+      val spec = FileNameSpec("", ".parquet")
+      val partition = "a=1/b=2"
+      val tempPath = committer.newTaskTempFile(tContext, Some(partition), spec)
+
+      // The temp file must be under the staging directory, not the final output path.
+      assert(tempPath.contains(".spark-staging-"),
+        s"Expected temp path under staging dir, got: $tempPath")
+      assert(!tempPath.startsWith(path.toUri.toString.stripSuffix("/") + "/" + partition),
+        s"Temp path must not point directly to the final output location: $tempPath")
+
+      // The partition must have been recorded so commitJob can overwrite it.
+      assert(committer.capturedPartitionPaths === Set(partition),
+        s"Expected partitionPaths = {$partition}, got: ${committer.capturedPartitionPaths}")
+    } finally {
+      jobCommitDir.delete()
+    }
+  }
+
+  /**
+   * A cloud committer that handles dynamic partitioning natively (via StreamCapabilities) must NOT
+   * have its partitions tracked in Spark's partitionPaths set: the committer takes care of
+   * overwriting itself, and the commitJob rename loop must not interfere.
+   */
+  test("SPARK-56588: Cloud committer with dynamic partition support does not track partitions in " +
+      "partitionPaths") {
+    val path = new Path("http://example/data")
+    val job = newJob(path)
+    val conf = job.getConfiguration
+    conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttempt0)
+    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1)
+    StubPathOutputCommitterBinding.bindWithDynamicPartitioning(conf, "http")
+    val tContext = new TaskAttemptContextImpl(conf, taskAttemptId0)
+    val committer = new PathOutputCommitProtocolForTest(jobId, path.toUri.toString, true)
+    committer.setupJob(tContext)
+    committer.setupTask(tContext)
+
+    val tempPath = committer.newTaskTempFile(tContext, Some("a=1"), FileNameSpec("", ".parquet"))
+
+    // The temp file must be under the committer's own work dir (path/_temporary),
+    // not written directly to the final output location.
+    val expectedWorkDir = path.toUri.toString.stripSuffix("/") + "/_temporary"
+    assert(tempPath.startsWith(expectedWorkDir),
+      s"Expected temp path under committer work dir ($expectedWorkDir), got: $tempPath")
 
+    assert(committer.capturedPartitionPaths.isEmpty,
+      s"partitionPaths must stay empty for cloud committers that handle " +
+        s"dynamic partition overwrite natively, " +
+        s"got: ${committer.capturedPartitionPaths}")
+  }
+
+  /**
+   * Without dynamicPartitionOverwrite, partitionPaths must remain empty even for
+   * FileOutputCommitter (baseline: existing behaviour must not regress).
+   */
+  test("SPARK-56588: FileOutputCommitter without dynamicPartitionOverwrite does not track " +
+      "partitions") {
+    val jobCommitDir = File.createTempFile("no-dyn-part-overwrite", "")
+    try {
+      jobCommitDir.delete()
+      val jobUri = jobCommitDir.toURI
+      val path = new Path(jobUri)
+      val job = newJob(path)
+      val conf = job.getConfiguration
+      conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttempt0)
+      conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1)
+      bindToFileOutputCommitterFactory(conf, "file")
+      val tContext = new TaskAttemptContextImpl(conf, taskAttemptId0)
+      val committer = new PathOutputCommitProtocolForTest(jobId, jobUri.toString, false)
+      committer.setupJob(tContext)
+      committer.setupTask(tContext)
+
+      committer.newTaskTempFile(tContext, Some("a=1"), FileNameSpec("", ".parquet"))
+
+      assert(committer.capturedPartitionPaths.isEmpty,
+        s"partitionPaths must be empty when dynamicPartitionOverwrite=false, " +
+          s"got: ${committer.capturedPartitionPaths}")
+    } finally {
+      jobCommitDir.delete()
+    }
+  }
+}
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 811bdd47d1873..a3443e4478391 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
index ec3c030723ce3..feab843804228 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
@@ -46,7 +46,7 @@ public class JavaModuleOptions {
       "-Dio.netty.tryReflectionSetAccessible=true",
       "-Dio.netty.allocator.type=pooled",
       "-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE",
-      "-Dio.netty.noUnsafe=false",
+      "--sun-misc-unsafe-memory-access=allow",
       "--enable-native-access=ALL-UNNAMED"};
 
     /**
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 9c7f51eb0885f..f4302f99265be 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 80d2866c59e12..e2f4aff537abe 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c5220a470f722..0c0f250073b8a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.13</artifactId>
-  <version>4.2.0.1-4.3.0-0</version>
+  <version>4.2.0.1-4.3.0-1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
@@ -217,8 +217,12 @@
     <commons-cli.version>1.11.0</commons-cli.version>
     <bouncycastle.version>1.84</bouncycastle.version>
     <tink.version>1.20.0</tink.version>
+    <!--
+      TODO: Once upgrade datasketches to a version that supports Java 25,
+            SPARK-53327 workaround should be reverted.
+     -->
     <datasketches.version>6.2.0</datasketches.version>
-    <netty.version>4.2.12.Final</netty.version>
+    <netty.version>4.2.13.Final</netty.version>
     <netty-tcnative.version>2.0.76.Final</netty-tcnative.version>
     <icu4j.version>78.3</icu4j.version>
     <junit.version>6.0.3</junit.version>
@@ -338,8 +342,9 @@
       -Dio.netty.tryReflectionSetAccessible=true
       -Dio.netty.allocator.type=pooled
       -Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE
-      -Dio.netty.noUnsafe=false
+      --sun-misc-unsafe-memory-access=allow
       --enable-native-access=ALL-UNNAMED
+      -XX:+EnableDynamicAgentLoading
     </extraJavaTestArgs>
     <mariadb.java.client.version>3.5.7</mariadb.java.client.version>
     <mysql.connector.version>9.6.0</mysql.connector.version>
@@ -3274,6 +3279,11 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-source-plugin</artifactId>
       </plugin>
+      <!-- Scalastyle is intentionally NOT bound to a phase here; activate the
+           `scalastyle` profile (or run `mvn scalastyle:check` explicitly) to
+           run it. Default Maven builds skip scalastyle so that a single
+           violation does not cascade into every Maven-invoked CI job; the
+           dedicated lint job is the single source of truth for style. -->
       <plugin>
         <groupId>org.scalastyle</groupId>
         <artifactId>scalastyle-maven-plugin</artifactId>
@@ -3290,13 +3300,6 @@
           <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
           <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
         </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>check</goal>
-            </goals>
-          </execution>
-        </executions>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -3434,6 +3437,32 @@
 
   <profiles>
 
+    <!--
+      Opt-in profile that binds scalastyle:check to the `verify` phase. Used
+      by the dedicated lint job; default Maven builds intentionally skip
+      scalastyle to avoid cascading a style violation into every Maven CI job.
+    -->
+    <profile>
+      <id>scalastyle</id>
+      <build>
+        <pluginManagement>
+          <plugins>
+            <plugin>
+              <groupId>org.scalastyle</groupId>
+              <artifactId>scalastyle-maven-plugin</artifactId>
+              <executions>
+                <execution>
+                  <goals>
+                    <goal>check</goal>
+                  </goals>
+                </execution>
+              </executions>
+            </plugin>
+          </plugins>
+        </pluginManagement>
+      </build>
+    </profile>
+
     <!--
       This profile is enabled automatically by the sbt build. It changes the scope for shaded
       dependencies, since we don't shade it in the artifacts generated by the sbt build.
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index b5434efee090c..78416ae9b5112 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -55,7 +55,17 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ApplicationAttemptInfo.copy"),
     ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.status.api.v1.ApplicationAttemptInfo$"),
     // [SPARK-56330][CORE] Add TaskInterruptListener to TaskContext for interrupt notifications
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.addTaskInterruptListener")
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.addTaskInterruptListener"),
+    // [SPARK-56700][SS] Make DataStreamReader.name public
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.DataStreamReader.name"),
+    // [SPARK-56395][SQL] Add NEAREST BY top-K ranking join
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.Dataset.nearestByJoin"),
+    // [SPARK-57332][SQL] MySQLDialect no longer overrides the visit methods below; the public
+    // Scala overrides on its private SQL builder are replaced by the inherited protected Java
+    // methods of V2ExpressionSQLBuilder. MySQLDialect is private, so this is not a public API.
+    ProblemFilters.exclude[InaccessibleMethodProblem]("org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder.visitStartsWith"),
+    ProblemFilters.exclude[InaccessibleMethodProblem]("org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder.visitEndsWith"),
+    ProblemFilters.exclude[InaccessibleMethodProblem]("org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder.visitContains")
   )
 
   // Exclude rules for 4.1.x from 4.0.0
@@ -88,7 +98,10 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.artifact.ArtifactManager.cachedBlockIdList"),
 
     // [SPARK-54323][PYTHON] Change the way to access logs to TVF instead of system view
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.TableValuedFunction.python_worker_logs")
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.TableValuedFunction.python_worker_logs"),
+
+    // [SPARK-56719][SS] Add DataStreamWriter.name() API for sink evolution
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.DataStreamWriter.name")
   )
 
   // Default exclude rules
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index dba15dc7bb052..5d3dfb94c36cf 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -125,6 +125,11 @@ object SparkBuild extends PomBuild {
           "The Java version used to build the project is outdated. " +
             s"Please use Java $minimumVersion or later.")
       }
+      if (currentVersionFeature == 25 && currentVersionUpdate < 3) {
+        throw new MessageOnlyException(
+          s"Java 25 requires update 3 or later due to JDK-8377811. " +
+            s"Current version: $currentVersion. Please use Java 25.0.3 or later.")
+      }
     },
     (Compile / compile) := ((Compile / compile) dependsOn checkJavaVersion).value,
     (Test / compile) := ((Test / compile) dependsOn checkJavaVersion).value
@@ -242,19 +247,18 @@ object SparkBuild extends PomBuild {
     Set(file)
   }
 
+  // Defines the standalone `scalaStyleOnCompile` / `scalaStyleOnTest` tasks
+  // invoked by `dev/lint-scala`. Style is intentionally NOT attached to
+  // `(Compile / compile)` -- a violation in one module would otherwise abort
+  // compile for that module and every transitive dependent, cascading style
+  // failures into every job that recompiles those sources (Build modules,
+  // Documentation generation, Java 17/25 Maven build, sparkr, ...). Each
+  // cascaded job then surfaces only a generic "exit code 1" with no file/line.
+  // After decoupling, the dedicated lint job is the single place style
+  // violations surface, with file/line annotations from `dev/scalastyle`.
   def enableScalaStyle: Seq[sbt.Def.Setting[_]] = Seq(
     scalaStyleOnCompile := cachedScalaStyle(Compile).value,
-    scalaStyleOnTest := cachedScalaStyle(Test).value,
-    (scalaStyleOnCompile / logLevel) := Level.Warn,
-    (scalaStyleOnTest / logLevel) := Level.Warn,
-    (Compile / compile) := {
-      scalaStyleOnCompile.value
-      (Compile / compile).value
-    },
-    (Test / compile) := {
-      scalaStyleOnTest.value
-      (Test / compile).value
-    }
+    scalaStyleOnTest := cachedScalaStyle(Test).value
   )
 
   lazy val compilerWarningSettings: Seq[sbt.Def.Setting[_]] = Seq(
@@ -290,18 +294,18 @@ object SparkBuild extends PomBuild {
     }
   )
 
-  val noLintOnCompile = sys.env.contains("NOLINT_ON_COMPILE") &&
-      !sys.env.get("NOLINT_ON_COMPILE").contains("false")
   lazy val sharedSettings = checkJavaVersionSettings ++
                             sparkGenjavadocSettings ++
                             compilerWarningSettings ++
-      (if (noLintOnCompile) Nil else enableScalaStyle) ++ Seq(
+                            enableScalaStyle ++ Seq(
     (Compile / exportJars) := true,
     (Test / exportJars) := false,
     javaHome := sys.env.get("JAVA_HOME")
       .orElse(sys.props.get("java.home"))
       .map(file),
     publishMavenStyle := true,
+    packageDoc / publishArtifact := false,
+    packageSrc / publishArtifact := (if (sys.env.contains("PUBLISH_PACKAGE_SRC")) true else false),
     unidocGenjavadocVersion := "0.19",
 
     // Override SBT's default resolvers:
@@ -412,7 +416,7 @@ object SparkBuild extends PomBuild {
   /* Enable shared settings on all projects */
   (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
     .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
-      ExcludedDependencies.settings ++ (if (noLintOnCompile) Nil else Checkstyle.settings) ++
+      ExcludedDependencies.settings ++ Checkstyle.settings ++
       ExcludeShims.settings))
 
   /* Enable tests settings for all projects except examples, assembly and tools */
@@ -1302,7 +1306,8 @@ object ExcludedDependencies {
       ExclusionRule("org.slf4j", "slf4j-simple"),
       ExclusionRule("javax.servlet", "javax.servlet-api"),
       ExclusionRule("io.netty", "netty-codec-protobuf"),
-      ExclusionRule("io.netty", "netty-codec-marshalling"))
+      ExclusionRule("io.netty", "netty-codec-marshalling"),
+      ExclusionRule("junit", "junit"))
   )
 }
 
@@ -1710,7 +1715,10 @@ object Unidoc {
         "-tag", "todo:X",
         "-tag", "groupname:X",
         "-tag", "inheritdoc",
-        "--ignore-source-errors", "-notree"
+        "--ignore-source-errors", "-notree",
+        "-Xmaxerrs", "0",
+        "-verbose",
+        "-Xdoclint:all", "-Xdoclint:-missing"
       )
     },
 
@@ -1957,8 +1965,9 @@ object TestSettings {
         "-Dio.netty.tryReflectionSetAccessible=true",
         "-Dio.netty.allocator.type=pooled",
         "-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE",
-        "-Dio.netty.noUnsafe=false",
-        "--enable-native-access=ALL-UNNAMED").mkString(" ")
+        "--sun-misc-unsafe-memory-access=allow",
+        "--enable-native-access=ALL-UNNAMED",
+        "-XX:+EnableDynamicAgentLoading").mkString(" ")
       s"-Xmx$heapSize -Xss4m -XX:MaxMetaspaceSize=$metaspaceSize -XX:ReservedCodeCacheSize=128m -Dfile.encoding=UTF-8 $extraTestJavaArgs"
         .split(" ").toSeq
     },
diff --git a/python/.gitignore b/python/.gitignore
index 6d95b545fd560..b865a73038ce7 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -7,3 +7,6 @@ build/
 dist/
 ./setup.py
 ./setup.cfg
+# Transient copies made by dev/make-distribution.sh while building the sdists.
+/LICENSE
+/NOTICE
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 45c9dca8b474a..82979a0344c3c 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -21,10 +21,12 @@ recursive-include deps/jars *.jar
 graft deps/bin
 recursive-include deps/sbin spark-config.sh spark-daemon.sh start-history-server.sh stop-history-server.sh
 recursive-include deps/data *.data *.txt
-recursive-include deps/licenses *.txt
+graft deps/licenses
 recursive-include deps/examples *.py
 recursive-include lib *.zip
 include README.md
+include LICENSE
+include NOTICE
 
 # Note that these commands are processed in the order they appear, so keep
 # this exclude at the end.
diff --git a/python/benchmarks/bench_eval_type.py b/python/benchmarks/bench_eval_type.py
index 1674a7e660ebf..f384393864ac9 100644
--- a/python/benchmarks/bench_eval_type.py
+++ b/python/benchmarks/bench_eval_type.py
@@ -200,11 +200,9 @@ class MockDataFactory:
 
     NAMED_TYPE_POOLS: dict[str, list[tuple[Callable, Any]]] = {
         "mixed": MIXED_TYPES,
-        "pure_ints": [
-            (lambda r: pa.array(np.random.randint(0, 1000, r, dtype=np.int64)), IntegerType())
-        ],
-        "pure_floats": [(lambda r: pa.array(np.random.rand(r)), DoubleType())],
-        "pure_strings": [(lambda r: pa.array([f"s{j}" for j in range(r)]), StringType())],
+        "pure_ints": [TYPE_REGISTRY["int"]],
+        "pure_floats": [TYPE_REGISTRY["double"]],
+        "pure_strings": [TYPE_REGISTRY["string"]],
         "pure_ts": [
             (
                 lambda r: pa.array(
@@ -498,21 +496,19 @@ def _cogrouped_map_arrow_left_semi(left, right):
         "few_groups_lg": (50, 50_000, 1, 4),
         "many_groups_sm": (2_000, 500, 1, 4),
         "many_groups_lg": (500, 10_000, 1, 4),
-        "wide_values": (200, 5_000, 1, 20),
+        "wide_cols": (200, 5_000, 1, 20),
         "multi_key": (200, 5_000, 3, 5),
     }
 
-    @staticmethod
-    def _build_scenario(name):
+    @classmethod
+    def _build_scenario(cls, name):
         """Build a cogroup scenario: two DataFrames with the same grouping structure.
 
         Unlike grouped map (which wraps columns in a struct), cogroup batches
         have flat columns: [key_col_0, ..., key_col_k, val_col_0, ..., val_col_v].
         """
         np.random.seed(42)
-        num_groups, rows_per_group, num_key_cols, num_value_cols = (
-            _CogroupedMapArrowBenchMixin._scenario_configs[name]
-        )
+        num_groups, rows_per_group, num_key_cols, num_value_cols = cls._scenario_configs[name]
         n_cols = num_key_cols + num_value_cols
         type_pool = MockDataFactory.MIXED_TYPES[:n_cols]
         while len(type_pool) < n_cols:
@@ -528,22 +524,27 @@ def _build_scenario(name):
         return_type = StructType(schema.fields[num_key_cols:])
         return (cogroups, return_type, num_key_cols, num_value_cols)
 
+    _eval_type = PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF
+    # Each UDF entry: (func, n_args). n_args=2 -> func(left, right);
+    # n_args=3 -> func(key, left, right). The Arrow path has no 3-arg variant,
+    # but the tuple shape is shared with the Pandas sibling so ``_write_scenario``
+    # can be inherited unchanged.
     _udfs = {
-        "identity_udf": _cogrouped_map_arrow_identity,
-        "concat_udf": _cogrouped_map_arrow_concat,
-        "left_semi_udf": _cogrouped_map_arrow_left_semi,
+        "identity_udf": (_cogrouped_map_arrow_identity, 2),
+        "concat_udf": (_cogrouped_map_arrow_concat, 2),
+        "left_semi_udf": (_cogrouped_map_arrow_left_semi, 2),
     }
     params = [list(_scenario_configs), list(_udfs)]
     param_names = ["scenario", "udf"]
 
     def _write_scenario(self, scenario, udf_name, buf):
         groups, schema, num_key_cols, num_value_cols = self._build_scenario(scenario)
-        udf_func = self._udfs[udf_name]
+        udf_func, _ = self._udfs[udf_name]
         left_offsets = MockUDFFactory.make_grouped_arg_offsets(num_key_cols, num_value_cols)
         right_offsets = MockUDFFactory.make_grouped_arg_offsets(num_key_cols, num_value_cols)
         arg_offsets = left_offsets + right_offsets
         MockProtocolWriter.write_worker_input(
-            PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF,
+            self._eval_type,
             lambda b: MockProtocolWriter.write_udf_payload(udf_func, schema, arg_offsets, b),
             lambda b: MockProtocolWriter.write_grouped_data_payload(groups, buf=b),
             buf,
@@ -563,8 +564,15 @@ class CogroupedMapArrowUDFPeakmemBench(_CogroupedMapArrowBenchMixin, _PeakmemBen
 # ``pandas.DataFrame``. Optional 3-arg variant ``(key, left, right)``.
 
 
-class _CogroupedMapPandasBenchMixin:
-    """Provides _write_scenario for SQL_COGROUPED_MAP_PANDAS_UDF."""
+class _CogroupedMapPandasBenchMixin(_CogroupedMapArrowBenchMixin):
+    """Provides _write_scenario for SQL_COGROUPED_MAP_PANDAS_UDF.
+
+    Inherits ``_build_scenario`` and ``_write_scenario`` from the Arrow
+    sibling; only the eval type, the UDFs, and the per-scenario row counts
+    differ. Adds a 3-arg ``key_identity_udf`` variant the Arrow path lacks
+    (``_write_scenario`` ignores the ``n_args`` slot, so the extra entry is
+    handled by the inherited writer).
+    """
 
     def _cogrouped_map_pandas_identity(left, right):
         """Identity cogroup UDF: returns left DataFrame as-is."""
@@ -592,36 +600,11 @@ def _cogrouped_map_pandas_key_identity(key, left, right):
         "few_groups_lg": (50, 10_000, 1, 4),
         "many_groups_sm": (500, 200, 1, 4),
         "many_groups_lg": (200, 2_000, 1, 4),
-        "wide_values": (100, 1_000, 1, 20),
+        "wide_cols": (100, 1_000, 1, 20),
         "multi_key": (100, 1_000, 3, 5),
     }
 
-    @staticmethod
-    def _build_scenario(name):
-        """Build a cogroup scenario: two DataFrames with the same grouping structure.
-
-        Like cogrouped arrow, batches have flat columns:
-        [key_col_0, ..., key_col_k, val_col_0, ..., val_col_v].
-        """
-        np.random.seed(42)
-        num_groups, rows_per_group, num_key_cols, num_value_cols = (
-            _CogroupedMapPandasBenchMixin._scenario_configs[name]
-        )
-        n_cols = num_key_cols + num_value_cols
-        type_pool = MockDataFactory.MIXED_TYPES[:n_cols]
-        while len(type_pool) < n_cols:
-            type_pool = type_pool + MockDataFactory.MIXED_TYPES[: n_cols - len(type_pool)]
-
-        cogroups, schema = MockDataFactory.make_cogrouped_batches(
-            num_groups=num_groups,
-            num_rows=rows_per_group,
-            num_cols=n_cols,
-            spark_type_pool=type_pool,
-            batch_size=rows_per_group,
-        )
-        return_type = StructType(schema.fields[num_key_cols:])
-        return (cogroups, return_type, num_key_cols, num_value_cols)
-
+    _eval_type = PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF
     # Each UDF entry: (func, n_args). n_args=2 -> func(left, right);
     # n_args=3 -> func(key, left, right).
     _udfs = {
@@ -633,19 +616,6 @@ def _build_scenario(name):
     params = [list(_scenario_configs), list(_udfs)]
     param_names = ["scenario", "udf"]
 
-    def _write_scenario(self, scenario, udf_name, buf):
-        groups, schema, num_key_cols, num_value_cols = self._build_scenario(scenario)
-        udf_func, _ = self._udfs[udf_name]
-        left_offsets = MockUDFFactory.make_grouped_arg_offsets(num_key_cols, num_value_cols)
-        right_offsets = MockUDFFactory.make_grouped_arg_offsets(num_key_cols, num_value_cols)
-        arg_offsets = left_offsets + right_offsets
-        MockProtocolWriter.write_worker_input(
-            PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
-            lambda b: MockProtocolWriter.write_udf_payload(udf_func, schema, arg_offsets, b),
-            lambda b: MockProtocolWriter.write_grouped_data_payload(groups, buf=b),
-            buf,
-        )
-
 
 class CogroupedMapPandasUDFTimeBench(_CogroupedMapPandasBenchMixin, _TimeBenchBase):
     pass
@@ -682,11 +652,11 @@ def _grouped_agg_arrow_mean_multi(col0, col1):
         "wide_cols": (200, 5_000, 20),
     }
 
-    @staticmethod
-    def _build_scenario(name):
+    @classmethod
+    def _build_scenario(cls, name):
         """Build a single scenario by name."""
         np.random.seed(42)
-        num_groups, rows_per_group, n_cols = _GroupedAggArrowBenchMixin._scenario_configs[name]
+        num_groups, rows_per_group, n_cols = cls._scenario_configs[name]
         return MockDataFactory.make_grouped_batches(
             num_groups=num_groups,
             num_rows=rows_per_group,
@@ -695,6 +665,7 @@ def _build_scenario(name):
             batch_size=rows_per_group,
         )
 
+    _eval_type = PythonEvalType.SQL_GROUPED_AGG_ARROW_UDF
     _udfs = {
         "sum_udf": _grouped_agg_arrow_sum,
         "mean_multi_udf": _grouped_agg_arrow_mean_multi,
@@ -718,7 +689,7 @@ def write_udf(b):
             MockProtocolWriter.write_udf_payload(udf_func, return_type, arg_offsets, b)
 
         MockProtocolWriter.write_worker_input(
-            PythonEvalType.SQL_GROUPED_AGG_ARROW_UDF,
+            self._eval_type,
             write_udf,
             lambda b: MockProtocolWriter.write_grouped_data_payload(groups, buf=b),
             buf,
@@ -754,6 +725,7 @@ def _grouped_agg_arrow_iter_mean_multi(batch_iter):
             total += (pc.mean(col0).as_py() or 0) + (pc.mean(col1).as_py() or 0)
         return total
 
+    _eval_type = PythonEvalType.SQL_GROUPED_AGG_ARROW_ITER_UDF
     _udfs = {
         "sum_udf": _grouped_agg_arrow_iter_sum,
         "mean_multi_udf": _grouped_agg_arrow_iter_mean_multi,
@@ -761,28 +733,6 @@ def _grouped_agg_arrow_iter_mean_multi(batch_iter):
     params = [list(_GroupedAggArrowBenchMixin._scenario_configs), list(_udfs)]
     param_names = ["scenario", "udf"]
 
-    def _write_scenario(self, scenario, udf_name, buf):
-        groups, _schema = self._build_scenario(scenario)
-        udf_func = self._udfs[udf_name]
-
-        # sum_udf uses 1 arg, mean_multi_udf uses 2 args
-        if "multi" in udf_name:
-            arg_offsets = [0, 1]
-        else:
-            arg_offsets = [0]
-
-        return_type = DoubleType()
-
-        def write_udf(b):
-            MockProtocolWriter.write_udf_payload(udf_func, return_type, arg_offsets, b)
-
-        MockProtocolWriter.write_worker_input(
-            PythonEvalType.SQL_GROUPED_AGG_ARROW_ITER_UDF,
-            write_udf,
-            lambda b: MockProtocolWriter.write_grouped_data_payload(groups, buf=b),
-            buf,
-        )
-
 
 class GroupedAggArrowIterUDFTimeBench(_GroupedAggArrowIterBenchMixin, _TimeBenchBase):
     pass
@@ -796,8 +746,15 @@ class GroupedAggArrowIterUDFPeakmemBench(_GroupedAggArrowIterBenchMixin, _Peakme
 # UDF receives ``pd.Series`` columns per group, returns scalar.
 
 
-class _GroupedAggPandasBenchMixin:
-    """Provides _write_scenario for SQL_GROUPED_AGG_PANDAS_UDF."""
+class _GroupedAggPandasBenchMixin(_GroupedAggArrowBenchMixin):
+    """Provides _write_scenario for SQL_GROUPED_AGG_PANDAS_UDF.
+
+    Inherits ``_build_scenario`` and ``_write_scenario`` from the Arrow
+    sibling; only the eval type and the UDFs differ. ``_scenario_configs``
+    is intentionally identical to the Arrow variant for apples-to-apples
+    comparison (the aggregations are cheap enough that pandas conversion
+    is not the bottleneck here).
+    """
 
     def _grouped_agg_pandas_sum(col):
         """Sum a single Pandas Series."""
@@ -807,56 +764,14 @@ def _grouped_agg_pandas_mean_multi(col0, col1):
         """Mean of two Pandas Series combined."""
         return (col0.mean() or 0) + (col1.mean() or 0)
 
-    _scenario_configs = {
-        "few_groups_sm": (50, 5_000, 5),
-        "few_groups_lg": (50, 50_000, 5),
-        "many_groups_sm": (2_000, 500, 5),
-        "many_groups_lg": (500, 10_000, 5),
-        "wide_cols": (200, 5_000, 20),
-    }
-
-    @staticmethod
-    def _build_scenario(name):
-        """Build a single scenario by name."""
-        np.random.seed(42)
-        num_groups, rows_per_group, n_cols = _GroupedAggPandasBenchMixin._scenario_configs[name]
-        return MockDataFactory.make_grouped_batches(
-            num_groups=num_groups,
-            num_rows=rows_per_group,
-            num_cols=n_cols,
-            spark_type_pool=MockDataFactory.NUMERIC_TYPES,
-            batch_size=rows_per_group,
-        )
-
+    _eval_type = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
     _udfs = {
         "sum_udf": _grouped_agg_pandas_sum,
         "mean_multi_udf": _grouped_agg_pandas_mean_multi,
     }
-    params = [list(_scenario_configs), list(_udfs)]
+    params = [list(_GroupedAggArrowBenchMixin._scenario_configs), list(_udfs)]
     param_names = ["scenario", "udf"]
 
-    def _write_scenario(self, scenario, udf_name, buf):
-        groups, _schema = self._build_scenario(scenario)
-        udf_func = self._udfs[udf_name]
-
-        # sum_udf uses 1 arg, mean_multi_udf uses 2 args
-        if "multi" in udf_name:
-            arg_offsets = [0, 1]
-        else:
-            arg_offsets = [0]
-
-        return_type = DoubleType()
-
-        def write_udf(b):
-            MockProtocolWriter.write_udf_payload(udf_func, return_type, arg_offsets, b)
-
-        MockProtocolWriter.write_worker_input(
-            PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
-            write_udf,
-            lambda b: MockProtocolWriter.write_grouped_data_payload(groups, buf=b),
-            buf,
-        )
-
 
 class GroupedAggPandasUDFTimeBench(_GroupedAggPandasBenchMixin, _TimeBenchBase):
     pass
@@ -892,7 +807,7 @@ def _grouped_map_arrow_filter(table):
         "few_groups_lg": (50, 50_000, 1, 4),
         "many_groups_sm": (2_000, 500, 1, 4),
         "many_groups_lg": (500, 10_000, 1, 4),
-        "wide_values": (200, 5_000, 1, 20),
+        "wide_cols": (200, 5_000, 1, 20),
         "multi_key": (200, 5_000, 3, 5),
     }
 
@@ -1081,6 +996,55 @@ class GroupedMapPandasUDFPeakmemBench(_GroupedMapPandasBenchMixin, _PeakmemBench
     pass
 
 
+# -- SQL_GROUPED_MAP_PANDAS_ITER_UDF -------------------------------------------
+# UDF receives ``Iterator[pandas.DataFrame]`` per group, returns
+# ``Iterator[pandas.DataFrame]``.
+
+
+class _GroupedMapPandasIterBenchMixin(_GroupedMapPandasBenchMixin):
+    """Provides ``_write_scenario`` for SQL_GROUPED_MAP_PANDAS_ITER_UDF."""
+
+    def _grouped_map_pandas_iter_identity(pdfs):
+        yield from pdfs
+
+    def _grouped_map_pandas_iter_sort(pdfs):
+        for pdf in pdfs:
+            yield pdf.sort_values(pdf.columns[0])
+
+    def _grouped_map_pandas_iter_key_identity(key, pdfs):
+        yield from pdfs
+
+    _udfs = {
+        "identity_udf": (_grouped_map_pandas_iter_identity, None, 1),
+        "sort_udf": (_grouped_map_pandas_iter_sort, None, 1),
+        "key_identity_udf": (_grouped_map_pandas_iter_key_identity, None, 2),
+    }
+    params = [list(_GroupedMapPandasBenchMixin._scenario_configs), list(_udfs)]
+    param_names = ["scenario", "udf"]
+
+    def _write_scenario(self, scenario, udf_name, buf):
+        groups, schema = self._build_scenario(scenario)
+        udf_func, ret_type, n_args = self._udfs[udf_name]
+        if ret_type is None:
+            ret_type = StructType(schema.fields[n_args - 1 :]) if n_args > 1 else schema
+        n_cols = len(schema.fields)
+        arg_offsets = MockUDFFactory.make_grouped_arg_offsets(n_args - 1, n_cols - (n_args - 1))
+        MockProtocolWriter.write_worker_input(
+            PythonEvalType.SQL_GROUPED_MAP_PANDAS_ITER_UDF,
+            lambda b: MockProtocolWriter.write_udf_payload(udf_func, ret_type, arg_offsets, b),
+            lambda b: MockProtocolWriter.write_grouped_data_payload(groups, buf=b),
+            buf,
+        )
+
+
+class GroupedMapPandasIterUDFTimeBench(_GroupedMapPandasIterBenchMixin, _TimeBenchBase):
+    pass
+
+
+class GroupedMapPandasIterUDFPeakmemBench(_GroupedMapPandasIterBenchMixin, _PeakmemBenchBase):
+    pass
+
+
 # -- SQL_MAP_ARROW_ITER_UDF ------------------------------------------------
 # UDF receives ``Iterator[pa.RecordBatch]``, returns ``Iterator[pa.RecordBatch]``.
 
@@ -1150,7 +1114,10 @@ def _write_scenario(self, scenario, udf_name, buf):
         batches, schema = self._build_scenario(scenario)
         udf_func, ret_type, arg_offsets = self._udfs[udf_name]
         if ret_type is None:
-            ret_type = schema.fields[0].dataType.fields[0].dataType
+            # mapInArrow UDFs return an Iterator[pa.RecordBatch] with the same
+            # schema as the input row (the inner struct, since make_batches
+            # wraps the row schema in a single struct column for the wire).
+            ret_type = schema.fields[0].dataType
         MockProtocolWriter.write_worker_input(
             PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
             lambda b: MockProtocolWriter.write_udf_payload(udf_func, ret_type, arg_offsets, b),
@@ -1167,6 +1134,89 @@ class MapArrowIterUDFPeakmemBench(_MapArrowIterBenchMixin, _PeakmemBenchBase):
     pass
 
 
+# -- SQL_MAP_PANDAS_ITER_UDF -------------------------------------------------
+# UDF receives ``Iterator[pandas.DataFrame]``, returns ``Iterator[pandas.DataFrame]``.
+
+
+class _MapPandasIterBenchMixin:
+    """Provides ``_write_scenario`` for SQL_MAP_PANDAS_ITER_UDF.
+
+    Wraps input batches in a struct column to match the JVM-side wire format
+    (``MapInBatchEvaluatorFactory`` wraps each row in another row, and the
+    Pandas serializer turns that struct back into a ``pandas.DataFrame``
+    when ``df_for_struct=True``).
+    """
+
+    def _identity_pdf_iter(it):
+        yield from it
+
+    def _sort_pdf_iter(it):
+        for pdf in it:
+            yield pdf.sort_values(pdf.columns[0]).reset_index(drop=True)
+
+    def _filter_pdf_iter(it):
+        for pdf in it:
+            yield pdf[pdf[pdf.columns[0]].notna()]
+
+    # Scaled down vs SQL_MAP_ARROW_ITER_UDF: pandas conversion adds
+    # per-batch Arrow<->Pandas overhead across all columns.
+    _scenario_configs = {
+        "sm_batch_few_col": ("mixed", 100_000, 5, 1_000),
+        "sm_batch_many_col": ("mixed", 10_000, 50, 1_000),
+        "lg_batch_few_col": ("mixed", 1_000_000, 5, 10_000),
+        "lg_batch_many_col": ("mixed", 100_000, 50, 10_000),
+        "pure_ints": ("pure_ints", 200_000, 10, 5_000),
+        "pure_floats": ("pure_floats", 200_000, 10, 5_000),
+        "pure_strings": ("pure_strings", 200_000, 10, 5_000),
+        "pure_ts": ("pure_ts", 200_000, 10, 5_000),
+        "mixed_types": ("mixed", 200_000, 10, 5_000),
+    }
+
+    @staticmethod
+    def _build_scenario(name):
+        """Build a single scenario by name."""
+        np.random.seed(42)
+        type_key, num_rows, num_cols, batch_size = _MapPandasIterBenchMixin._scenario_configs[name]
+        pool = MockDataFactory.NAMED_TYPE_POOLS[type_key]
+        struct_type = MockDataFactory.make_struct_type(
+            num_fields=num_cols,
+            base_types=pool,
+        )
+        return MockDataFactory.make_batches(
+            num_rows=num_rows,
+            num_cols=1,
+            spark_type_pool=[struct_type],
+            batch_size=batch_size,
+        )
+
+    _udfs = {
+        "identity_udf": (_identity_pdf_iter, [0]),
+        "sort_udf": (_sort_pdf_iter, [0]),
+        "filter_udf": (_filter_pdf_iter, [0]),
+    }
+    params = [list(_scenario_configs), list(_udfs)]
+    param_names = ["scenario", "udf"]
+
+    def _write_scenario(self, scenario, udf_name, buf):
+        batches, schema = self._build_scenario(scenario)
+        udf_func, arg_offsets = self._udfs[udf_name]
+        ret_type = schema.fields[0].dataType
+        MockProtocolWriter.write_worker_input(
+            PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
+            lambda b: MockProtocolWriter.write_udf_payload(udf_func, ret_type, arg_offsets, b),
+            lambda b: MockProtocolWriter.write_data_payload(iter(batches), b),
+            buf,
+        )
+
+
+class MapPandasIterUDFTimeBench(_MapPandasIterBenchMixin, _TimeBenchBase):
+    pass
+
+
+class MapPandasIterUDFPeakmemBench(_MapPandasIterBenchMixin, _PeakmemBenchBase):
+    pass
+
+
 # -- SQL_SCALAR_ARROW_UDF ---------------------------------------------------
 # UDF receives ``pa.Array`` columns, returns ``pa.Array``.
 
@@ -1196,11 +1246,11 @@ def _nullcheck_arrow(c):
         "mixed_types": ("mixed", 5_000_000, 10, 5_000),
     }
 
-    @staticmethod
-    def _build_scenario(name):
+    @classmethod
+    def _build_scenario(cls, name):
         """Build a single scenario by name."""
         np.random.seed(42)
-        type_key, num_rows, num_cols, batch_size = _ScalarArrowBenchMixin._scenario_configs[name]
+        type_key, num_rows, num_cols, batch_size = cls._scenario_configs[name]
         pool = MockDataFactory.NAMED_TYPE_POOLS[type_key]
         return MockDataFactory.make_batches(
             num_rows=num_rows,
@@ -1285,8 +1335,13 @@ class ScalarArrowIterUDFPeakmemBench(_ScalarArrowIterBenchMixin, _PeakmemBenchBa
 # Measures the full Arrow-to-Pandas-to-Arrow round-trip.
 
 
-class _ScalarPandasBenchMixin:
-    """Mixin for SQL_SCALAR_PANDAS_UDF benchmarks."""
+class _ScalarPandasBenchMixin(_ScalarArrowBenchMixin):
+    """Mixin for SQL_SCALAR_PANDAS_UDF benchmarks.
+
+    Inherits ``_build_scenario`` and ``_write_scenario`` from the Arrow
+    sibling; only the eval type, the UDFs, and the per-scenario row counts
+    differ (pandas conversion is more expensive, so smaller batches).
+    """
 
     def _scalar_pandas_sort(s):
         return s.sort_values().reset_index(drop=True)
@@ -1306,19 +1361,6 @@ def _scalar_pandas_nullcheck(s):
         "mixed_types": ("mixed", 1_000_000, 10, 5_000),
     }
 
-    @staticmethod
-    def _build_scenario(name):
-        """Build a single scenario by name."""
-        np.random.seed(42)
-        type_key, num_rows, num_cols, batch_size = _ScalarPandasBenchMixin._scenario_configs[name]
-        pool = MockDataFactory.NAMED_TYPE_POOLS[type_key]
-        return MockDataFactory.make_batches(
-            num_rows=num_rows,
-            num_cols=num_cols,
-            spark_type_pool=pool,
-            batch_size=batch_size,
-        )
-
     _eval_type = PythonEvalType.SQL_SCALAR_PANDAS_UDF
     # ret_type=None means "use schema.fields[0].dataType from the scenario"
     _udfs = {
@@ -1329,18 +1371,6 @@ def _build_scenario(name):
     params = [list(_scenario_configs), list(_udfs)]
     param_names = ["scenario", "udf"]
 
-    def _write_scenario(self, scenario, udf_name, buf):
-        batches, schema = self._build_scenario(scenario)
-        udf_func, ret_type, arg_offsets = self._udfs[udf_name]
-        if ret_type is None:
-            ret_type = schema.fields[0].dataType
-        MockProtocolWriter.write_worker_input(
-            self._eval_type,
-            lambda b: MockProtocolWriter.write_udf_payload(udf_func, ret_type, arg_offsets, b),
-            lambda b: MockProtocolWriter.write_data_payload(iter(batches), b),
-            buf,
-        )
-
 
 class ScalarPandasUDFTimeBench(_ScalarPandasBenchMixin, _TimeBenchBase):
     pass
@@ -1405,41 +1435,37 @@ def _window_agg_arrow_mean_multi(col0, col1):
 
         return (pc.mean(col0).as_py() or 0) + (pc.mean(col1).as_py() or 0)
 
-    def _build_scenarios():
-        """Build scenarios for SQL_WINDOW_AGG_ARROW_UDF.
-
-        Returns a dict mapping scenario name to ``(groups, schema)``.
-        """
-        scenarios = {}
-
-        for name, (num_groups, rows_per_group, n_cols) in {
-            "few_groups_sm": (50, 5_000, 5),
-            "few_groups_lg": (50, 50_000, 5),
-            "many_groups_sm": (2_000, 500, 5),
-            "many_groups_lg": (500, 10_000, 5),
-            "wide_cols": (200, 5_000, 20),
-        }.items():
-            groups, schema = MockDataFactory.make_grouped_batches(
-                num_groups=num_groups,
-                num_rows=rows_per_group,
-                num_cols=n_cols,
-                spark_type_pool=MockDataFactory.NUMERIC_TYPES,
-                batch_size=rows_per_group,
-            )
-            scenarios[name] = (groups, schema)
+    _scenario_configs = {
+        "few_groups_sm": (50, 5_000, 5),
+        "few_groups_lg": (50, 50_000, 5),
+        "many_groups_sm": (2_000, 500, 5),
+        "many_groups_lg": (500, 10_000, 5),
+        "wide_cols": (200, 5_000, 20),
+    }
 
-        return scenarios
+    @classmethod
+    def _build_scenario(cls, name):
+        """Build a single scenario by name."""
+        np.random.seed(42)
+        num_groups, rows_per_group, n_cols = cls._scenario_configs[name]
+        return MockDataFactory.make_grouped_batches(
+            num_groups=num_groups,
+            num_rows=rows_per_group,
+            num_cols=n_cols,
+            spark_type_pool=MockDataFactory.NUMERIC_TYPES,
+            batch_size=rows_per_group,
+        )
 
-    _scenarios = _build_scenarios()
+    _eval_type = PythonEvalType.SQL_WINDOW_AGG_ARROW_UDF
     _udfs = {
         "sum_udf": _window_agg_arrow_sum,
         "mean_multi_udf": _window_agg_arrow_mean_multi,
     }
-    params = [list(_scenarios), list(_udfs)]
+    params = [list(_scenario_configs), list(_udfs)]
     param_names = ["scenario", "udf"]
 
     def _write_scenario(self, scenario, udf_name, buf):
-        groups, _schema = self._scenarios[scenario]
+        groups, _schema = self._build_scenario(scenario)
         udf_func = self._udfs[udf_name]
 
         # sum_udf uses 1 arg, mean_multi_udf uses 2 args
@@ -1454,7 +1480,7 @@ def write_udf(b):
             MockProtocolWriter.write_udf_payload(udf_func, return_type, arg_offsets, b)
 
         MockProtocolWriter.write_worker_input(
-            PythonEvalType.SQL_WINDOW_AGG_ARROW_UDF,
+            self._eval_type,
             write_udf,
             lambda b: MockProtocolWriter.write_grouped_data_payload(groups, buf=b),
             buf,
@@ -1468,3 +1494,42 @@ class WindowAggArrowUDFTimeBench(_WindowAggArrowBenchMixin, _TimeBenchBase):
 
 class WindowAggArrowUDFPeakmemBench(_WindowAggArrowBenchMixin, _PeakmemBenchBase):
     pass
+
+
+# -- SQL_WINDOW_AGG_PANDAS_UDF -----------------------------------------------
+# UDF receives ``pd.Series`` columns for the entire window partition, returns scalar.
+
+
+class _WindowAggPandasBenchMixin(_WindowAggArrowBenchMixin):
+    """Provides _write_scenario for SQL_WINDOW_AGG_PANDAS_UDF.
+
+    Inherits ``_build_scenario`` and ``_write_scenario`` from the Arrow
+    sibling; only the eval type and the UDFs differ. ``_scenario_configs``
+    is intentionally identical to the Arrow variant for apples-to-apples
+    comparison (the aggregations are cheap enough that pandas conversion
+    is not the bottleneck here).
+    """
+
+    def _window_agg_pandas_sum(col):
+        """Sum a single Pandas Series."""
+        return col.sum()
+
+    def _window_agg_pandas_mean_multi(col0, col1):
+        """Mean of two Pandas Series combined."""
+        return (col0.mean() or 0) + (col1.mean() or 0)
+
+    _eval_type = PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF
+    _udfs = {
+        "sum_udf": _window_agg_pandas_sum,
+        "mean_multi_udf": _window_agg_pandas_mean_multi,
+    }
+    params = [list(_WindowAggArrowBenchMixin._scenario_configs), list(_udfs)]
+    param_names = ["scenario", "udf"]
+
+
+class WindowAggPandasUDFTimeBench(_WindowAggPandasBenchMixin, _TimeBenchBase):
+    pass
+
+
+class WindowAggPandasUDFPeakmemBench(_WindowAggPandasBenchMixin, _PeakmemBenchBase):
+    pass
diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst b/python/docs/source/reference/pyspark.sql/dataframe.rst
index 9652eb7c42758..e611004356646 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -84,6 +84,7 @@ DataFrame
     DataFrame.metadataColumn
     DataFrame.melt
     DataFrame.na
+    DataFrame.nearestByJoin
     DataFrame.observe
     DataFrame.offset
     DataFrame.orderBy
@@ -140,6 +141,7 @@ DataFrame
     DataFrame.writeTo
     DataFrame.mergeInto
     DataFrame.pandas_api
+    DataFrame.zipWithIndex
     DataFrameNaFunctions.drop
     DataFrameNaFunctions.fill
     DataFrameNaFunctions.replace
diff --git a/python/docs/source/reference/pyspark.sql/datasource.rst b/python/docs/source/reference/pyspark.sql/datasource.rst
index 453875de93360..bb52ef26d94f7 100644
--- a/python/docs/source/reference/pyspark.sql/datasource.rst
+++ b/python/docs/source/reference/pyspark.sql/datasource.rst
@@ -35,10 +35,12 @@ Python Data Source
     DataSourceReader.read
     DataSourceRegistration.register
     DataSourceStreamReader.commit
+    DataSourceStreamReader.getDefaultReadLimit
     DataSourceStreamReader.initialOffset
     DataSourceStreamReader.latestOffset
     DataSourceStreamReader.partitions
     DataSourceStreamReader.read
+    DataSourceStreamReader.reportLatestOffset
     DataSourceStreamReader.stop
     DataSourceWriter.abort
     DataSourceWriter.commit
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index c00d75d7f837f..bf9e055d128e1 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -299,7 +299,14 @@ Date and Timestamp Functions
     timestamp_micros
     timestamp_millis
     timestamp_seconds
+    time_bucket
     time_diff
+    time_from_micros
+    time_from_millis
+    time_from_seconds
+    time_to_micros
+    time_to_millis
+    time_to_seconds
     time_trunc
     to_date
     to_time
@@ -585,6 +592,7 @@ VARIANT Functions
 .. autosummary::
     :toctree: api/
 
+    is_valid_variant
     is_variant_null
     parse_json
     schema_of_variant
diff --git a/python/docs/source/reference/pyspark.sql/io.rst b/python/docs/source/reference/pyspark.sql/io.rst
index 0554e4bea89da..3aafb95713148 100644
--- a/python/docs/source/reference/pyspark.sql/io.rst
+++ b/python/docs/source/reference/pyspark.sql/io.rst
@@ -24,6 +24,7 @@ Input/Output
 .. autosummary::
     :toctree: api/
 
+    DataFrameReader.changes
     DataFrameReader.csv
     DataFrameReader.format
     DataFrameReader.jdbc
diff --git a/python/docs/source/reference/pyspark.ss/io.rst b/python/docs/source/reference/pyspark.ss/io.rst
index 7a20777fdc7c8..38e15cb23f897 100644
--- a/python/docs/source/reference/pyspark.ss/io.rst
+++ b/python/docs/source/reference/pyspark.ss/io.rst
@@ -25,10 +25,12 @@ Input/Output
 .. autosummary::
     :toctree: api/
 
+    DataStreamReader.changes
     DataStreamReader.csv
     DataStreamReader.format
     DataStreamReader.json
     DataStreamReader.load
+    DataStreamReader.name
     DataStreamReader.option
     DataStreamReader.options
     DataStreamReader.orc
diff --git a/python/mypy.ini b/python/mypy.ini
index 7e004e84f2d21..5baa77c370c51 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -135,6 +135,12 @@ ignore_missing_imports = True
 [mypy-pyarrow.*]
 ignore_missing_imports = True
 
+; TODO(ARROW-48970): Remove follow_imports once PyArrow ships complete type stubs
+; for pyarrow.compute. Currently its functions are dynamically generated and
+; invisible to mypy since PyArrow 24 added py.typed.
+[mypy-pyarrow.compute]
+follow_imports = skip
+
 [mypy-psutil.*]
 ignore_missing_imports = True
 
diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py
index 911c50141e43f..122c4ca6d4b00 100755
--- a/python/packaging/classic/setup.py
+++ b/python/packaging/classic/setup.py
@@ -104,7 +104,14 @@
 SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
 USER_SCRIPTS_PATH = os.path.join(SPARK_HOME, "sbin")
 DATA_PATH = os.path.join(SPARK_HOME, "data")
-LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
+# The classic PySpark package bundles the assembly jars, so it ships the binary
+# license texts (licenses-binary), which enumerate those jars' licenses, mirroring
+# the binary distribution. The connect/client packages bundle no jars.
+LICENSES_PATH = os.path.join(SPARK_HOME, "licenses-binary")
+if not os.path.isdir(LICENSES_PATH):
+    # In a binary release dist (the RELEASE mode below), the binary license texts
+    # were already copied to licenses/ (see dev/make-distribution.sh).
+    LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
 
 SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
 USER_SCRIPTS_TARGET = os.path.join(TEMP_PATH, "sbin")
@@ -341,7 +348,7 @@ def run(self):
             ],
             "pyspark.python.lib": ["*.zip"],
             "pyspark.data": ["*.txt", "*.data"],
-            "pyspark.licenses": ["*.txt"],
+            "pyspark.licenses": ["*"],
             "pyspark.examples.src.main.python": ["*.py", "*/*.py"],
         },
         scripts=scripts,
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index cd2eea5258c84..fcfa347092ee2 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -17,6 +17,7 @@
 
 import os
 import sys
+import hmac
 import select
 import struct
 import socketserver
@@ -60,6 +61,7 @@ def _deserialize_accumulator(
 
 class SpecialAccumulatorIds:
     SQL_UDF_PROFIER = -1
+    SQL_UDF_PROFIER_V2 = -2
 
 
 class Accumulator(Generic[T]):
@@ -298,7 +300,8 @@ def accum_updates() -> bool:
             num_updates = read_int(self.rfile)
             for _ in range(num_updates):
                 aid, update = pickleSer._read_with_length(self.rfile)
-                _accumulatorRegistry[aid] += update
+                if aid in _accumulatorRegistry:
+                    _accumulatorRegistry[aid] += update
             # Write a byte in acknowledgement
             self.wfile.write(struct.pack("!b", 1))
             return False
@@ -308,7 +311,7 @@ def authenticate_and_accum_updates() -> bool:
             received_token: Union[bytes, str] = self.rfile.read(len(auth_token))
             if isinstance(received_token, bytes):
                 received_token = received_token.decode("utf-8")
-            if received_token == auth_token:
+            if hmac.compare_digest(received_token, auth_token):
                 accum_updates()
                 # we've authenticated, we can break out of the first loop now
                 return True
diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json
index 808127772f72a..38417cbf01889 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -417,7 +417,7 @@
   },
   "INVALID_MULTIPLE_ARGUMENT_CONDITIONS": {
     "message": [
-      "[{arg_names}] cannot be <condition>."
+      "[<arg_names>] cannot be <condition>."
     ]
   },
   "INVALID_NDARRAY_DIMENSION": {
@@ -602,6 +602,34 @@
       "Multiple pipeline spec files found in the directory `<dir_path>`. Please remove one or choose a particular one with the --spec argument."
     ]
   },
+  "NEAREST_BY_JOIN": {
+    "message": [
+      "Invalid nearest-by join."
+    ],
+    "sub_class": {
+      "NUM_RESULTS_OUT_OF_RANGE": {
+        "message": [
+          "The number of results <numResults> must be between <min> and <max>. Update the literal in `APPROX NEAREST <numResults> BY ...` (or `EXACT NEAREST <numResults> BY ...`) to fall within that range."
+        ]
+      },
+      "UNSUPPORTED_DIRECTION": {
+        "message": [
+          "Unsupported nearest-by join direction '<direction>'. Supported nearest-by join directions include: <supported>."
+        ]
+      },
+      "UNSUPPORTED_JOIN_TYPE": {
+        "message": [
+          "Unsupported nearest-by join type <joinType>. Supported types: <supported>."
+        ]
+      },
+      "UNSUPPORTED_MODE": {
+        "message": [
+          "Unsupported nearest-by join mode '<mode>'. Supported modes include: <supported>."
+        ]
+      }
+    },
+    "sqlState": "42604"
+  },
   "NEGATIVE_VALUE": {
     "message": [
       "Value for `<arg_name>` must be greater than or equal to 0, got '<arg_value>'."
@@ -958,6 +986,12 @@
     ],
     "sqlState" : "22023"
   },
+  "ST_INVALID_ENDIANNESS_VALUE" : {
+    "message" : [
+      "Endianness '<endianness>' must be either 'NDR' (little-endian) or 'XDR' (big-endian)."
+    ],
+    "sqlState" : "22023"
+  },
   "ST_INVALID_SRID_VALUE" : {
     "message" : [
       "Invalid or unsupported SRID (spatial reference identifier) value: <srid>."
diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py b/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py
index e2cde9ce7554b..ac99e113c447b 100644
--- a/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py
+++ b/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py
@@ -199,7 +199,7 @@ def test_crossvalidator_on_pipeline(self):
         )
 
         scaler = StandardScaler(inputCol="features", outputCol="scaled_features")
-        lorv2 = LORV2(numTrainWorkers=2, featuresCol="scaled_features")
+        lorv2 = LORV2(numTrainWorkers=1, featuresCol="scaled_features")
         pipeline = Pipeline(stages=[scaler, lorv2])
 
         grid2 = ParamGridBuilder().addGrid(lorv2.maxIter, [2, 3]).build()
@@ -208,6 +208,7 @@ def test_crossvalidator_on_pipeline(self):
             estimatorParamMaps=grid2,
             parallelism=2,
             evaluator=BinaryClassificationEvaluator(),
+            numFolds=2,
         )
         cv_model = cv.fit(train_dataset)
         transformed_result = (
diff --git a/python/pyspark/pandas/mlflow.py b/python/pyspark/pandas/mlflow.py
index f15ff83b11cc9..06988a2871c6b 100644
--- a/python/pyspark/pandas/mlflow.py
+++ b/python/pyspark/pandas/mlflow.py
@@ -142,7 +142,7 @@ def load_model(
     >>> import mlflow.sklearn
     >>> from tempfile import mkdtemp
     >>> d = mkdtemp("pandas_on_spark_mlflow")
-    >>> set_tracking_uri("file:%s"%d)
+    >>> set_tracking_uri(f"sqlite:///{d}/mlflow.db")
     >>> client = MlflowClient()
     >>> exp_id = mlflow.create_experiment("my_experiment")
     >>> exp = mlflow.set_experiment("my_experiment")
diff --git a/python/pyspark/pipelines/__init__.py b/python/pyspark/pipelines/__init__.py
index d93320e963766..bd41c9ecd6b2e 100644
--- a/python/pyspark/pipelines/__init__.py
+++ b/python/pyspark/pipelines/__init__.py
@@ -16,6 +16,7 @@
 #
 from pyspark.pipelines.api import (
     append_flow,
+    create_auto_cdc_flow,
     create_streaming_table,
     materialized_view,
     table,
@@ -25,6 +26,7 @@
 
 __all__ = [
     "append_flow",
+    "create_auto_cdc_flow",
     "create_streaming_table",
     "materialized_view",
     "table",
diff --git a/python/pyspark/pipelines/api.py b/python/pyspark/pipelines/api.py
index e6bae4f832d51..5cbc003708f20 100644
--- a/python/pyspark/pipelines/api.py
+++ b/python/pyspark/pipelines/api.py
@@ -14,12 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import Callable, Dict, List, Optional, Union, overload
+from typing import Callable, Dict, List, Literal, Optional, Union, overload
 
 from pyspark.errors import PySparkTypeError
 from pyspark.pipelines.graph_element_registry import get_active_graph_element_registry
 from pyspark.pipelines.type_error_utils import validate_optional_list_of_str_arg
-from pyspark.pipelines.flow import Flow, QueryFunction
+from pyspark.pipelines.flow import AutoCdcFlow, Flow, QueryFunction
 from pyspark.pipelines.source_code_location import (
     get_caller_source_code_location,
 )
@@ -29,6 +29,7 @@
     TemporaryView,
     Sink,
 )
+from pyspark.sql import Column
 from pyspark.sql.types import StructType
 
 
@@ -525,3 +526,201 @@ def create_sink(
         comment=None,
     )
     get_active_graph_element_registry().register_output(sink)
+
+
+def create_auto_cdc_flow(
+    target: str,
+    source: str,
+    keys: Union[List[str], List[Column]],
+    sequence_by: Union[str, Column],
+    apply_as_deletes: Optional[Union[str, Column]] = None,
+    column_list: Optional[Union[List[str], List[Column]]] = None,
+    except_column_list: Optional[Union[List[str], List[Column]]] = None,
+    stored_as_scd_type: Optional[Literal[1, "1"]] = None,
+    name: Optional[str] = None,
+) -> None:
+    """
+    Create an Auto CDC flow into the target table from the Change Data Capture (CDC) source.
+    Target table must have already been created using the `create_streaming_table` function.
+    Only one of column_list and except_column_list can be specified.
+
+    Example:
+        create_auto_cdc_flow(
+            target="target",
+            source="source",
+            keys=["key"],
+            sequence_by="sequence_expr",
+            column_list=["key", "value"],
+        )
+
+    Note that for keys, sequence_by, column_list, and except_column_list the arguments have to
+    be column identifiers without qualifiers, e.g. they cannot be col("sourceTable.keyId").
+
+    The set and types of `keys` are part of the Auto CDC flow's persisted state. Changing keys
+    across incremental runs (renaming, swapping, growing, shrinking, or changing the type of a
+    key column) is not supported and will produce undefined behavior. To change the key set,
+    fully refresh the target table.
+
+    :param target: The name of the target table that receives the Auto CDC flow.
+    :param source: The name of the CDC source to stream from.
+    :param keys: The column or combination of columns that uniquely identify a row in the source \
+        data. This is used to identify which CDC events apply to specific records in the target \
+        table. These keys also identify records in the target table, e.g., if there exists a record \
+        for given keys and the CDC source has an UPSERT operation for the same keys, we will update \
+        the existing record. At least one key must be provided. This should be a list of column \
+        identifiers without qualifiers, expressed as either Python strings or PySpark Columns.
+    :param sequence_by: An expression that we use to order the source data. This can be expressed \
+        as either a SQL expression string or a PySpark Column.
+    :param apply_as_deletes: A boolean expression indicating whether an event represents a \
+        delete. This can be expressed as either a SQL expression string or a PySpark Column.
+    :param column_list: Columns that will be included in the output table. This should be a list \
+        of column identifiers without qualifiers, expressed as either Python strings or PySpark \
+        Columns. Only one of column_list and except_column_list can be specified.
+    :param except_column_list: Columns that will be excluded from the output table. This should \
+        be a list of column identifiers without qualifiers, expressed as either Python strings or \
+        PySpark Columns. Only one of column_list and except_column_list can be specified. When \
+        this is specified, all columns in the `DataFrame` of the target table except those in \
+        this list will be in the output table.
+    :param stored_as_scd_type: The SCD type for the target table. Only 1 (or "1") is supported. \
+        When not specified, the server default applies.
+    :param name: The name of the flow for this create_auto_cdc_flow command. When unspecified, \
+        this will build a "default flow" with name equal to the target name.
+    """
+    # Lazy import: pyspark.sql.connect.functions.builtin transitively imports grpc, which is
+    # not available in the docs-build environment. pyspark.pipelines.api is loaded eagerly
+    # from pyspark.pipelines.__init__, so a top-level import here would break docs CI.
+    from pyspark.sql.connect.functions.builtin import expr as _connect_expr
+
+    if type(target) is not str:
+        raise PySparkTypeError(
+            errorClass="NOT_EXPECTED_TYPE",
+            messageParameters={
+                "arg_name": "target",
+                "expected_type": "str",
+                "arg_type": type(target).__name__,
+            },
+        )
+    if type(source) is not str:
+        raise PySparkTypeError(
+            errorClass="NOT_EXPECTED_TYPE",
+            messageParameters={
+                "arg_name": "source",
+                "expected_type": "str",
+                "arg_type": type(source).__name__,
+            },
+        )
+    if name is not None and type(name) is not str:
+        raise PySparkTypeError(
+            errorClass="NOT_EXPECTED_TYPE",
+            messageParameters={
+                "arg_name": "name",
+                "expected_type": "str",
+                "arg_type": type(name).__name__,
+            },
+        )
+
+    if name is None:
+        name = target
+
+    keys = _normalize_column_list(arg_name="keys", column_list=keys)
+    column_list = _normalize_optional_column_list(arg_name="column_list", column_list=column_list)
+    except_column_list = _normalize_optional_column_list(
+        arg_name="except_column_list", column_list=except_column_list
+    )
+
+    if isinstance(sequence_by, str):
+        sequence_by = _connect_expr(sequence_by)
+    elif not isinstance(sequence_by, Column):
+        raise PySparkTypeError(
+            errorClass="NOT_EXPECTED_TYPE",
+            messageParameters={
+                "arg_name": "sequence_by",
+                "expected_type": "str or Column",
+                "arg_type": type(sequence_by).__name__,
+            },
+        )
+
+    if isinstance(apply_as_deletes, str):
+        apply_as_deletes = _connect_expr(apply_as_deletes)
+    elif apply_as_deletes is not None and not isinstance(apply_as_deletes, Column):
+        raise PySparkTypeError(
+            errorClass="NOT_EXPECTED_TYPE",
+            messageParameters={
+                "arg_name": "apply_as_deletes",
+                "expected_type": "str or Column",
+                "arg_type": type(apply_as_deletes).__name__,
+            },
+        )
+
+    if stored_as_scd_type is not None and str(stored_as_scd_type) != "1":
+        raise PySparkTypeError(
+            errorClass="NOT_EXPECTED_TYPE",
+            messageParameters={
+                "arg_name": "stored_as_scd_type",
+                "expected_type": "Literal[1, '1']",
+                "arg_type": type(stored_as_scd_type).__name__,
+            },
+        )
+
+    source_code_location = get_caller_source_code_location(stacklevel=1)
+
+    flow = AutoCdcFlow(
+        name=name,
+        target=target,
+        source=source,
+        keys=keys,
+        sequence_by=sequence_by,
+        apply_as_deletes=apply_as_deletes,
+        column_list=column_list,
+        except_column_list=except_column_list,
+        stored_as_scd_type=stored_as_scd_type,
+        source_code_location=source_code_location,
+    )
+
+    get_active_graph_element_registry().register_auto_cdc_flow(flow)
+
+
+def _normalize_optional_column_list(
+    arg_name: str,
+    column_list: Optional[Union[List[str], List[Column]]],
+) -> Optional[List[Column]]:
+    if column_list is None:
+        return None
+    return _normalize_column_list(arg_name=arg_name, column_list=column_list)
+
+
+def _normalize_column_list(
+    arg_name: str,
+    column_list: Union[List[str], List[Column]],
+) -> List[Column]:
+    # Lazy import: see comment in create_auto_cdc_flow.
+    from pyspark.sql.connect.functions.builtin import col as _connect_col
+
+    if not isinstance(column_list, list):
+        raise PySparkTypeError(
+            errorClass="NOT_EXPECTED_TYPE",
+            messageParameters={
+                "arg_name": arg_name,
+                "expected_type": "list[str] or list[Column]",
+                "arg_type": type(column_list).__name__,
+            },
+        )
+
+    normalized: List[Column] = []
+
+    for column in column_list:
+        if isinstance(column, str):
+            normalized.append(_connect_col(column))
+        elif isinstance(column, Column):
+            normalized.append(column)
+        else:
+            raise PySparkTypeError(
+                errorClass="NOT_EXPECTED_TYPE",
+                messageParameters={
+                    "arg_name": arg_name,
+                    "expected_type": "list[str] or list[Column]",
+                    "arg_type": type(column).__name__,
+                },
+            )
+
+    return normalized
diff --git a/python/pyspark/pipelines/cli.py b/python/pyspark/pipelines/cli.py
index 986e9828c9a26..bc0d718f8a758 100644
--- a/python/pyspark/pipelines/cli.py
+++ b/python/pyspark/pipelines/cli.py
@@ -253,6 +253,7 @@ def register_definitions(
                         assert module_spec.loader is not None, (
                             f"Module spec has no loader for {file}"
                         )
+                        module.__dict__["spark"] = spark
                         with add_pipeline_analysis_context(
                             spark=spark, dataflow_graph_id=dataflow_graph_id, flow_name=None
                         ):
diff --git a/python/pyspark/pipelines/flow.py b/python/pyspark/pipelines/flow.py
index 7c499c0b36221..b1922454a551f 100644
--- a/python/pyspark/pipelines/flow.py
+++ b/python/pyspark/pipelines/flow.py
@@ -15,9 +15,10 @@
 # limitations under the License.
 #
 from dataclasses import dataclass
-from typing import Callable, Dict
+from typing import Callable, Dict, List, Literal, Optional
 
 from pyspark.sql import DataFrame
+from pyspark.sql import Column
 from pyspark.pipelines.source_code_location import SourceCodeLocation
 
 QueryFunction = Callable[[], DataFrame]
@@ -41,3 +42,35 @@ class Flow:
     spark_conf: Dict[str, str]
     source_code_location: SourceCodeLocation
     func: QueryFunction
+
+
+@dataclass(frozen=True)
+class AutoCdcFlow:
+    """Definition of an Auto CDC flow in a pipeline dataflow graph.
+
+    An Auto CDC flow applies Change Data Capture (CDC) events from a source to a target
+    streaming table.
+
+    :param name: Optional name of the flow. When None, defaults to the target name.
+    :param target: The name of the target streaming table.
+    :param source: The name of the CDC source to stream from.
+    :param keys: Column(s) that uniquely identify a row in source and target data.
+    :param sequence_by: Expression used to order the source data.
+    :param apply_as_deletes: Optional delete condition for the merge operation.
+    :param column_list: Optional columns to include in the output table.
+    :param except_column_list: Optional columns to exclude from the output table.
+    :param stored_as_scd_type: Optional SCD type for the target table. Only 1 (or "1") is \
+        supported.
+    :param source_code_location: The location of the source code that created this flow.
+    """
+
+    name: Optional[str]
+    target: str
+    source: str
+    keys: List[Column]
+    sequence_by: Column
+    apply_as_deletes: Optional[Column]
+    column_list: Optional[List[Column]]
+    except_column_list: Optional[List[Column]]
+    stored_as_scd_type: Optional[Literal[1, "1"]]
+    source_code_location: SourceCodeLocation
diff --git a/python/pyspark/pipelines/graph_element_registry.py b/python/pyspark/pipelines/graph_element_registry.py
index 8e311fc2ca98e..4eddabaabda0e 100644
--- a/python/pyspark/pipelines/graph_element_registry.py
+++ b/python/pyspark/pipelines/graph_element_registry.py
@@ -19,7 +19,7 @@
 from pathlib import Path
 
 from pyspark.pipelines.output import Output
-from pyspark.pipelines.flow import Flow
+from pyspark.pipelines.flow import AutoCdcFlow, Flow
 from contextlib import contextmanager
 from contextvars import ContextVar
 from typing import Generator, Optional
@@ -42,6 +42,10 @@ def register_output(self, output: Output) -> None:
     def register_flow(self, flow: Flow) -> None:
         """Add the given flow to the registry."""
 
+    @abstractmethod
+    def register_auto_cdc_flow(self, flow: AutoCdcFlow) -> None:
+        """Add the given Auto CDC flow to the registry."""
+
     @abstractmethod
     def register_sql(self, sql_text: str, file_path: Path) -> None:
         """Register a string containing SQL statements the dataflow graph.
diff --git a/python/pyspark/pipelines/init_cli.py b/python/pyspark/pipelines/init_cli.py
index a1dbdfd9d5586..18bbb70ed9c18 100644
--- a/python/pyspark/pipelines/init_cli.py
+++ b/python/pyspark/pipelines/init_cli.py
@@ -26,9 +26,7 @@
 """
 
 PYTHON_EXAMPLE = """from pyspark import pipelines as dp
-from pyspark.sql import DataFrame, SparkSession
-
-spark = SparkSession.active()
+from pyspark.sql import DataFrame
 
 @dp.materialized_view
 def example_python_materialized_view() -> DataFrame:
diff --git a/python/pyspark/pipelines/source_code_location.py b/python/pyspark/pipelines/source_code_location.py
index cbf4cbe514a69..e4f0e99b1dd43 100644
--- a/python/pyspark/pipelines/source_code_location.py
+++ b/python/pyspark/pipelines/source_code_location.py
@@ -28,35 +28,11 @@ class SourceCodeLocation:
 
 def get_caller_source_code_location(stacklevel: int) -> SourceCodeLocation:
     """
-    Returns a SourceCodeLocation object representing the location code that invokes this function.
+    Returns a SourceCodeLocation object representing the location of the code that invokes
+    this function.
 
-    If this function is called from a decorator (ex. @sdp.table), note that the returned line
-    number is affected by how the decorator was triggered - i.e. whether @sdp.table or @sdp.table()
-    was called - AND what python version is being used
-
-    Case 1:
-    |@sdp.table()
-    |def fn
-
-    @sdp.table() is executed immediately, on line 1. This is true for all python versions.
-
-    Case 2:
-    |@sdp.table
-    |def fn
-
-    In python < 3.10, @sdp.table will expand to fn = sdp.table(fn), replacing the line that `fn` is
-    defined on. This would be line 2. More interestingly, this means:
-
-    |@sdp.table
-    |
-    |
-    |def fn
-
-    Will expand to fn = sdp.table(fn) on line 4, where `fn` is defined.
-
-    However, in python 3.10+, the line number in the stack trace will still be the line that the
-    decorator was defined on. In other words, case 2 will be treated the same as case 1, and the
-    line number will be 1.
+    When called from a decorator (e.g. ``@sdp.table`` or ``@sdp.table()``), the returned
+    line number is the line on which the decorator was applied.
 
     :param stacklevel: The number of stack frames to go up. 0 means the direct caller of this
         function, 1 means the caller of the caller, and so on.
diff --git a/python/pyspark/pipelines/spark_connect_graph_element_registry.py b/python/pyspark/pipelines/spark_connect_graph_element_registry.py
index ab88317908302..2eef264990a31 100644
--- a/python/pyspark/pipelines/spark_connect_graph_element_registry.py
+++ b/python/pyspark/pipelines/spark_connect_graph_element_registry.py
@@ -17,7 +17,7 @@
 from pathlib import Path
 
 from pyspark.errors import PySparkTypeError
-from pyspark.sql import SparkSession
+from pyspark.sql import SparkSession, Column
 from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
 from pyspark.pipelines.output import (
     Output,
@@ -27,12 +27,12 @@
     StreamingTable,
     TemporaryView,
 )
-from pyspark.pipelines.flow import Flow
+from pyspark.pipelines.flow import AutoCdcFlow, Flow
 from pyspark.pipelines.graph_element_registry import GraphElementRegistry
 from pyspark.pipelines.source_code_location import SourceCodeLocation
 from pyspark.sql.connect.types import pyspark_types_to_proto_types
 from pyspark.sql.types import StructType
-from typing import Any, cast
+from typing import Any, List, Optional, cast
 import pyspark.sql.connect.proto as pb2
 from pyspark.pipelines.add_pipeline_analysis_context import add_pipeline_analysis_context
 
@@ -133,6 +133,40 @@ def register_flow(self, flow: Flow) -> None:
         command.pipeline_command.define_flow.CopyFrom(inner_command)
         self._client.execute_command(command)
 
+    def register_auto_cdc_flow(self, flow: AutoCdcFlow) -> None:
+        from pyspark.sql.connect.column import Column as ConnectColumn
+
+        def to_plan(col: Column) -> Any:
+            return cast(ConnectColumn, col).to_plan(self._client)
+
+        def to_plans(cols: Optional[List[Column]]) -> list:
+            return [] if cols is None else [to_plan(c) for c in cols]
+
+        auto_cdc_details = pb2.PipelineCommand.DefineFlow.AutoCdcFlowDetails(
+            source=flow.source,
+            keys=to_plans(flow.keys),
+            sequence_by=to_plan(flow.sequence_by),
+            column_list=to_plans(flow.column_list),
+            except_column_list=to_plans(flow.except_column_list),
+        )
+        if flow.stored_as_scd_type is not None:
+            auto_cdc_details.stored_as_scd_type = pb2.PipelineCommand.DefineFlow.SCDType.SCD_TYPE_1
+        if flow.apply_as_deletes is not None:
+            auto_cdc_details.apply_as_deletes.CopyFrom(to_plan(flow.apply_as_deletes))
+
+        inner_command = pb2.PipelineCommand.DefineFlow(
+            dataflow_graph_id=self._dataflow_graph_id,
+            flow_name=flow.name,
+            target_dataset_name=flow.target,
+            auto_cdc_flow_details=auto_cdc_details,
+            sql_conf={},
+            source_code_location=source_code_location_to_proto(flow.source_code_location),
+        )
+
+        command = pb2.Command()
+        command.pipeline_command.define_flow.CopyFrom(inner_command)
+        self._client.execute_command(command)
+
     def register_sql(self, sql_text: str, file_path: Path) -> None:
         inner_command = pb2.PipelineCommand.DefineSqlGraphElements(
             dataflow_graph_id=self._dataflow_graph_id,
diff --git a/python/pyspark/pipelines/tests/local_graph_element_registry.py b/python/pyspark/pipelines/tests/local_graph_element_registry.py
index 0e22641930b9a..3b9ea15a1ed6b 100644
--- a/python/pyspark/pipelines/tests/local_graph_element_registry.py
+++ b/python/pyspark/pipelines/tests/local_graph_element_registry.py
@@ -20,7 +20,7 @@
 from typing import List, Sequence
 
 from pyspark.pipelines.output import Output
-from pyspark.pipelines.flow import Flow
+from pyspark.pipelines.flow import AutoCdcFlow, Flow
 from pyspark.pipelines.graph_element_registry import GraphElementRegistry
 
 
@@ -34,6 +34,7 @@ class LocalGraphElementRegistry(GraphElementRegistry):
     def __init__(self) -> None:
         self._outputs: List[Output] = []
         self._flows: List[Flow] = []
+        self._auto_cdc_flows: List[AutoCdcFlow] = []
         self._sql_files: List[SqlFile] = []
 
     def register_output(self, output: Output) -> None:
@@ -42,6 +43,9 @@ def register_output(self, output: Output) -> None:
     def register_flow(self, flow: Flow) -> None:
         self._flows.append(flow)
 
+    def register_auto_cdc_flow(self, flow: AutoCdcFlow) -> None:
+        self._auto_cdc_flows.append(flow)
+
     def register_sql(self, sql_text: str, file_path: Path) -> None:
         self._sql_files.append(SqlFile(sql_text, file_path))
 
@@ -53,6 +57,10 @@ def outputs(self) -> Sequence[Output]:
     def flows(self) -> Sequence[Flow]:
         return self._flows
 
+    @property
+    def auto_cdc_flows(self) -> Sequence[AutoCdcFlow]:
+        return self._auto_cdc_flows
+
     @property
     def sql_files(self) -> Sequence[SqlFile]:
         return self._sql_files
diff --git a/python/pyspark/pipelines/tests/test_graph_element_registry.py b/python/pyspark/pipelines/tests/test_graph_element_registry.py
index 1e6fcf224a0ac..fd8ed439b130b 100644
--- a/python/pyspark/pipelines/tests/test_graph_element_registry.py
+++ b/python/pyspark/pipelines/tests/test_graph_element_registry.py
@@ -17,11 +17,14 @@
 
 import unittest
 
-from pyspark.errors import PySparkException
+from pyspark.errors import PySparkException, PySparkTypeError
 from pyspark.pipelines.graph_element_registry import graph_element_registration_context
 from pyspark import pipelines as dp
+from pyspark.pipelines.flow import AutoCdcFlow
 from pyspark.pipelines.output import Sink
 from pyspark.pipelines.tests.local_graph_element_registry import LocalGraphElementRegistry
+from pyspark.sql import Column
+from pyspark.sql.connect.functions.builtin import col, expr
 from typing import cast
 
 
@@ -97,6 +100,147 @@ def flow2():
         self.assertEqual(sink_obj.options["key1"], "value1")
         assert sink_obj.source_code_location.filename.endswith("test_graph_element_registry.py")
 
+    def test_create_auto_cdc_flow(self):
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("target")
+            dp.create_auto_cdc_flow(
+                target="target",
+                source="source",
+                keys=[col("key")],
+                sequence_by=expr("seq"),
+            )
+
+        self.assertEqual(len(registry.outputs), 1)
+        self.assertEqual(len(registry.auto_cdc_flows), 1)
+
+        flow = cast(AutoCdcFlow, registry.auto_cdc_flows[0])
+        self.assertEqual(flow.target, "target")
+        self.assertEqual(flow.source, "source")
+
+        # When name is not specified, it inherits the target's name at construction time.
+        self.assertEqual(flow.name, "target")
+        self.assertIsNone(flow.stored_as_scd_type)
+        self.assertIsNone(flow.apply_as_deletes)
+        assert flow.source_code_location.filename.endswith("test_graph_element_registry.py")
+
+    def test_create_auto_cdc_flow_with_all_args(self):
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("tgt")
+            dp.create_auto_cdc_flow(
+                target="tgt",
+                source="src",
+                keys=[col("id")],
+                sequence_by=expr("ts"),
+                apply_as_deletes=expr("op = 'DELETE'"),
+                column_list=[col("id"), col("val")],
+                stored_as_scd_type=1,
+                name="my_flow",
+            )
+
+        flow = cast(AutoCdcFlow, registry.auto_cdc_flows[0])
+        self.assertEqual(flow.name, "my_flow")
+        self.assertEqual(flow.stored_as_scd_type, 1)
+
+    def test_create_auto_cdc_flow_with_string_args(self):
+        # Verify that string forms of column / expression arguments are normalized to
+        # PySpark Columns, equivalent to passing col(...) / expr(...) directly.
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("tgt")
+            dp.create_auto_cdc_flow(
+                target="tgt",
+                source="src",
+                keys=["id"],
+                sequence_by="ts",
+                apply_as_deletes="op = 'DELETE'",
+                column_list=["id", "val"],
+            )
+
+        flow = cast(AutoCdcFlow, registry.auto_cdc_flows[0])
+        for k in flow.keys:
+            self.assertIsInstance(k, Column)
+        self.assertIsInstance(flow.sequence_by, Column)
+        self.assertIsInstance(flow.apply_as_deletes, Column)
+        assert flow.column_list is not None
+        for c in flow.column_list:
+            self.assertIsInstance(c, Column)
+
+    def test_create_auto_cdc_flow_stored_as_scd_type_string(self):
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("t")
+            dp.create_auto_cdc_flow(
+                target="t",
+                source="s",
+                keys=[col("k")],
+                sequence_by=expr("seq"),
+                stored_as_scd_type="1",
+            )
+
+        flow = cast(AutoCdcFlow, registry.auto_cdc_flows[0])
+        self.assertEqual(flow.stored_as_scd_type, "1")
+
+    def test_create_auto_cdc_flow_invalid_scd_type(self):
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("t")
+            with self.assertRaises(PySparkTypeError) as ctx:
+                dp.create_auto_cdc_flow(
+                    target="t",
+                    source="s",
+                    keys=[col("k")],
+                    sequence_by=expr("seq"),
+                    stored_as_scd_type=2,  # type: ignore[arg-type]
+                )
+            self.assertEqual(ctx.exception.getCondition(), "NOT_EXPECTED_TYPE")
+
+    def test_create_auto_cdc_flow_with_except_column_list(self):
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("tgt")
+            dp.create_auto_cdc_flow(
+                target="tgt",
+                source="src",
+                keys=[col("id")],
+                sequence_by=expr("ts"),
+                except_column_list=["op", "ts"],
+            )
+
+        flow = cast(AutoCdcFlow, registry.auto_cdc_flows[0])
+        self.assertIsNone(flow.column_list)
+        assert flow.except_column_list is not None
+        self.assertEqual(len(flow.except_column_list), 2)
+        for c in flow.except_column_list:
+            self.assertIsInstance(c, Column)
+
+    def test_create_auto_cdc_flow_rejects_non_str_target(self):
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("tgt")
+            with self.assertRaises(PySparkTypeError) as ctx:
+                dp.create_auto_cdc_flow(
+                    target=123,  # type: ignore[arg-type]
+                    source="src",
+                    keys=[col("id")],
+                    sequence_by=expr("ts"),
+                )
+            self.assertEqual(ctx.exception.getCondition(), "NOT_EXPECTED_TYPE")
+
+    def test_create_auto_cdc_flow_rejects_invalid_key_element(self):
+        registry = LocalGraphElementRegistry()
+        with graph_element_registration_context(registry):
+            dp.create_streaming_table("tgt")
+            with self.assertRaises(PySparkTypeError) as ctx:
+                dp.create_auto_cdc_flow(
+                    target="tgt",
+                    source="src",
+                    keys=[123],  # type: ignore[list-item]
+                    sequence_by=expr("ts"),
+                )
+            self.assertEqual(ctx.exception.getCondition(), "NOT_EXPECTED_TYPE")
+
     def test_definition_without_graph_element_registry(self):
         for decorator in [dp.table, dp.temporary_view, dp.materialized_view]:
             with self.assertRaises(PySparkException) as context:
@@ -129,6 +273,19 @@ def b():
             "GRAPH_ELEMENT_DEFINED_OUTSIDE_OF_DECLARATIVE_PIPELINE",
         )
 
+        with self.assertRaises(PySparkException) as context:
+            dp.create_auto_cdc_flow(
+                target="t",
+                source="s",
+                keys=["k"],
+                sequence_by="seq",
+            )
+
+        self.assertEqual(
+            context.exception.getCondition(),
+            "GRAPH_ELEMENT_DEFINED_OUTSIDE_OF_DECLARATIVE_PIPELINE",
+        )
+
 
 if __name__ == "__main__":
     from pyspark.testing import main
diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi
index b3b31962ad45b..94e3ccf770939 100644
--- a/python/pyspark/sql/_typing.pyi
+++ b/python/pyspark/sql/_typing.pyi
@@ -19,9 +19,11 @@
 from typing import (
     Any,
     Callable,
+    Dict,
     List,
     Optional,
     Tuple,
+    TypedDict,
     TypeVar,
     Union,
 )
@@ -29,8 +31,10 @@ from typing_extensions import Literal, Protocol
 
 import datetime
 import decimal
+import pstats
 
 from pyspark._typing import PrimitiveType
+from pyspark.profiler import CodeMapDict
 import pyspark.sql.types
 from pyspark.sql.column import Column
 from pyspark.sql.tvf_argument import TableValuedFunctionArgument
@@ -81,3 +85,11 @@ class UserDefinedFunctionLike(Protocol):
     def returnType(self) -> pyspark.sql.types.DataType: ...
     def __call__(self, *args: ColumnOrName) -> Column: ...
     def asNondeterministic(self) -> UserDefinedFunctionLike: ...
+
+ProfileResults = Dict[Union[int, str], Tuple[Optional[pstats.Stats], Optional[CodeMapDict]]]
+
+class ProfileResult(TypedDict, total=False):
+    perf: pstats.Stats
+    memory: CodeMapDict
+
+ProfileResultsV2 = Dict[Union[int, str], ProfileResult]
diff --git a/python/pyspark/sql/classic/dataframe.py b/python/pyspark/sql/classic/dataframe.py
index 3501f01135d80..727af95485944 100644
--- a/python/pyspark/sql/classic/dataframe.py
+++ b/python/pyspark/sql/classic/dataframe.py
@@ -813,6 +813,21 @@ def lateralJoin(
             jdf = self._jdf.lateralJoin(other._jdf, on._jc, how)
         return DataFrame(jdf, self.sparkSession)
 
+    def nearestByJoin(
+        self,
+        other: ParentDataFrame,
+        rankingExpression: Column,
+        numResults: int,
+        mode: str,
+        direction: str,
+        *,
+        joinType: str = "inner",
+    ) -> ParentDataFrame:
+        jdf = self._jdf.nearestByJoin(
+            other._jdf, rankingExpression._jc, int(numResults), mode, direction, joinType
+        )
+        return DataFrame(jdf, self.sparkSession)
+
     # TODO(SPARK-22947): Fix the DataFrame API.
     def _joinAsOf(
         self,
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index 8e4fa1073fb6d..d4991cd2a4125 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -62,8 +62,12 @@ def get(
         self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue
     ) -> Optional[str]:
         """
-        Returns the value of Spark runtime configuration property for the given key,
-        assuming it is set.
+        Returns the value of Spark runtime configuration property for the given key.
+
+        If ``default`` is not provided and the key is not explicitly set, returns the key's
+        built-in default value if one exists, otherwise raises an exception. If ``default``
+        is provided and the key is not explicitly set, returns ``default`` instead of
+        the key's built-in default value (if any).
 
         .. versionadded:: 2.0.0
 
@@ -72,19 +76,43 @@ def get(
         key : str
             key of the configuration to get.
         default : str, optional
-            value of the configuration to get if the key does not exist.
+            value to return if the key is not explicitly set. When provided, this overrides
+            the key's built-in default value.
 
         Returns
         -------
-        The string value of the configuration set, or None.
+        str or None
+            The value of the configuration property.
+
+        Raises
+        ------
+        pyspark.errors.SparkNoSuchElementException
+            If the key is not explicitly set, has no built-in default value, and ``default``
+            is not provided.
 
         Examples
         --------
+        A key with no built-in default returns the provided ``default`` when not explicitly set:
+
         >>> spark.conf.get("non-existent-key", "my_default")
         'my_default'
+
+        An explicitly set key returns its value:
+
         >>> spark.conf.set("my_key", "my_value")
         >>> spark.conf.get("my_key")
         'my_value'
+
+        A key with a built-in default returns that default when not explicitly set:
+
+        >>> spark.conf.unset("spark.sql.sources.partitionOverwriteMode")
+        >>> spark.conf.get("spark.sql.sources.partitionOverwriteMode")
+        'STATIC'
+
+        Providing ``default`` overrides the built-in default, not just the absence of a value:
+
+        >>> spark.conf.get("spark.sql.sources.partitionOverwriteMode", "DYNAMIC")
+        'DYNAMIC'
         """
         self._check_type(key, "key")
         if default is _NoValue:
diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py
index 18628e8f9bb5d..efd3b3309198b 100644
--- a/python/pyspark/sql/connect/client/core.py
+++ b/python/pyspark/sql/connect/client/core.py
@@ -1578,7 +1578,7 @@ def handle_response(
                         if observed_metrics.name == "__python_accumulator__":
                             for metric in observed_metrics.metrics:
                                 aid, update = pickleSer.loads(LiteralExpression._to_value(metric))
-                                if aid == SpecialAccumulatorIds.SQL_UDF_PROFIER:
+                                if aid == SpecialAccumulatorIds.SQL_UDF_PROFIER_V2:
                                     self._profiler_collector._update(update)
                         elif observed_metrics.name in observations:
                             observation_result = observations[observed_metrics.name]._result
@@ -2300,8 +2300,15 @@ def _delete_ml_cache(self, cache_ids: List[str], evict_only: bool = False) -> Li
             return []
 
     def _on_exit(self) -> None:
+        # If the client has already been explicitly closed, skip all cleanup RPCs.
+        # The server-side resources were released by close(); reissuing them here
+        # is wasted work and, if the server has since become unreachable, can
+        # block process exit on the gRPC call.
+        if self._closed:
+            return
+
         self._cleanup_ml_cache()
-        if self._release_session_on_exit and not self._closed:
+        if self._release_session_on_exit:
             try:
                 self.release_session()
             except Exception:
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index c6602e08fac4c..b0a9692f289ad 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -726,6 +726,30 @@ def lateralJoin(
             session=self._session,
         )
 
+    def nearestByJoin(
+        self,
+        other: ParentDataFrame,
+        rankingExpression: Column,
+        numResults: int,
+        mode: str,
+        direction: str,
+        *,
+        joinType: str = "inner",
+    ) -> ParentDataFrame:
+        other = self._check_same_session(other)
+        return DataFrame(
+            plan.NearestByJoin(
+                left=self._plan,
+                right=other._plan,
+                ranking_expression=rankingExpression,
+                num_results=int(numResults),
+                join_type=joinType,
+                mode=mode,
+                direction=direction,
+            ),
+            session=self._session,
+        )
+
     def _joinAsOf(
         self,
         other: ParentDataFrame,
diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py
index 30227ba5fcf1c..22e52d91232cd 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -2189,6 +2189,13 @@ def is_variant_null(v: "ColumnOrName") -> Column:
 is_variant_null.__doc__ = pysparkfuncs.is_variant_null.__doc__
 
 
+def is_valid_variant(v: "ColumnOrName") -> Column:
+    return _invoke_function("is_valid_variant", _to_col(v))
+
+
+is_valid_variant.__doc__ = pysparkfuncs.is_valid_variant.__doc__
+
+
 def variant_get(v: "ColumnOrName", path: Union[Column, str], targetType: str) -> Column:
     assert isinstance(path, (Column, str))
     if isinstance(path, str):
@@ -3728,6 +3735,19 @@ def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Co
 timestamp_add.__doc__ = pysparkfuncs.timestamp_add.__doc__
 
 
+def time_bucket(
+    bucket_size: "Column",
+    ts: "ColumnOrName",
+    origin: Optional["Column"] = None,
+) -> Column:
+    if origin is None:
+        return _invoke_function_over_columns("time_bucket", bucket_size, ts)
+    return _invoke_function_over_columns("time_bucket", bucket_size, ts, origin)
+
+
+time_bucket.__doc__ = pysparkfuncs.time_bucket.__doc__
+
+
 def window(
     timeColumn: "ColumnOrName",
     windowDuration: str,
@@ -5397,8 +5417,12 @@ def bitmap_and_agg(col: "ColumnOrName") -> Column:
 # Geospatial ST Functions
 
 
-def st_asbinary(geo: "ColumnOrName") -> Column:
-    return _invoke_function_over_columns("st_asbinary", geo)
+def st_asbinary(geo: "ColumnOrName", endianness: Optional["ColumnOrName"] = None) -> Column:
+    if endianness is None:
+        return _invoke_function_over_columns("st_asbinary", geo)
+    else:
+        _endianness = lit(endianness) if isinstance(endianness, str) else endianness
+        return _invoke_function_over_columns("st_asbinary", geo, _endianness)
 
 
 st_asbinary.__doc__ = pysparkfuncs.st_asbinary.__doc__
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 65fce72475b58..540d81ffc6907 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -1345,6 +1345,108 @@ def _repr_html_(self) -> str:
         """
 
 
+# Acceptance lists for `nearestByJoin`. Must stay aligned with `NearestByJoinValidation` in
+# `sql/api/.../catalyst/plans/NearestByJoinValidation.scala`.
+_NEAREST_BY_JOIN_MAX_NUM_RESULTS = 100000
+_NEAREST_BY_JOIN_SUPPORTED_JOIN_TYPES = frozenset({"inner", "leftouter", "left"})
+_NEAREST_BY_JOIN_SUPPORTED_JOIN_TYPE_DISPLAY = "'INNER', 'LEFT OUTER'"
+_NEAREST_BY_JOIN_SUPPORTED_MODES = ("approx", "exact")
+_NEAREST_BY_JOIN_SUPPORTED_DIRECTIONS = ("distance", "similarity")
+
+
+class NearestByJoin(LogicalPlan):
+    def __init__(
+        self,
+        left: Optional[LogicalPlan],
+        right: LogicalPlan,
+        ranking_expression: Column,
+        num_results: int,
+        join_type: str,
+        mode: str,
+        direction: str,
+    ) -> None:
+        super().__init__(left, self._collect_references([ranking_expression]))
+        self.left = cast(LogicalPlan, left)
+        self.right = right
+        self.ranking_expression = ranking_expression
+        # Mirror of the Scala `Dataset.validateNearestByJoinArgs` validator -- raises the same
+        # `NEAREST_BY_JOIN.*` error classes the server would, so the user sees a consistent
+        # error regardless of where the check fires.
+        if num_results < 1 or num_results > _NEAREST_BY_JOIN_MAX_NUM_RESULTS:
+            raise AnalysisException(
+                errorClass="NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+                messageParameters={
+                    "numResults": str(num_results),
+                    "min": "1",
+                    "max": str(_NEAREST_BY_JOIN_MAX_NUM_RESULTS),
+                },
+            )
+        if join_type.lower().replace("_", "") not in _NEAREST_BY_JOIN_SUPPORTED_JOIN_TYPES:
+            raise AnalysisException(
+                errorClass="NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+                messageParameters={
+                    "joinType": join_type,
+                    "supported": _NEAREST_BY_JOIN_SUPPORTED_JOIN_TYPE_DISPLAY,
+                },
+            )
+        if mode.lower() not in _NEAREST_BY_JOIN_SUPPORTED_MODES:
+            raise AnalysisException(
+                errorClass="NEAREST_BY_JOIN.UNSUPPORTED_MODE",
+                messageParameters={
+                    "mode": mode,
+                    "supported": "'" + "', '".join(_NEAREST_BY_JOIN_SUPPORTED_MODES) + "'",
+                },
+            )
+        if direction.lower() not in _NEAREST_BY_JOIN_SUPPORTED_DIRECTIONS:
+            raise AnalysisException(
+                errorClass="NEAREST_BY_JOIN.UNSUPPORTED_DIRECTION",
+                messageParameters={
+                    "direction": direction,
+                    "supported": "'" + "', '".join(_NEAREST_BY_JOIN_SUPPORTED_DIRECTIONS) + "'",
+                },
+            )
+        self.num_results = int(num_results)
+        self.join_type = join_type
+        self.mode = mode
+        self.direction = direction
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.nearest_by_join.left.CopyFrom(self.left.plan(session))
+        plan.nearest_by_join.right.CopyFrom(self.right.plan(session))
+        plan.nearest_by_join.ranking_expression.CopyFrom(self.ranking_expression.to_plan(session))
+        plan.nearest_by_join.num_results = self.num_results
+        plan.nearest_by_join.join_type = self.join_type
+        plan.nearest_by_join.mode = self.mode
+        plan.nearest_by_join.direction = self.direction
+        return self._with_relations(plan, session)
+
+    @property
+    def observations(self) -> Dict[str, "Observation"]:
+        return {**super().observations, **self.right.observations}
+
+    def print(self, indent: int = 0) -> str:
+        i = " " * indent
+        o = " " * (indent + LogicalPlan.INDENT)
+        n = indent + LogicalPlan.INDENT * 2
+        return (
+            f"{i}<NearestByJoin numResults={self.num_results} joinType={self.join_type} "
+            f"mode={self.mode} direction={self.direction}>\n{o}"
+            f"left=\n{self.left.print(n)}\n{o}right=\n{self.right.print(n)}"
+        )
+
+    def _repr_html_(self) -> str:
+        return f"""
+        <ul>
+            <li>
+                <b>NearestByJoin</b><br />
+                Left: {self.left._repr_html_()}
+                Right: {self.right._repr_html_()}
+            </li>
+        </uL>
+        """
+
+
 class SetOperation(LogicalPlan):
     def __init__(
         self,
@@ -2002,6 +2104,7 @@ def __init__(self, child: "LogicalPlan") -> None:
         self.options: Dict[str, Optional[str]] = {}
         self.num_buckets: int = -1
         self.bucket_cols: List[str] = []
+        self.with_schema_evolution: bool = False
 
     def command(self, session: "SparkConnectClient") -> proto.Command:
         assert self._child is not None
@@ -2013,6 +2116,7 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
         plan.write_operation.sort_column_names.extend(self.sort_cols)
         plan.write_operation.partitioning_columns.extend(self.partitioning_cols)
         plan.write_operation.clustering_columns.extend(self.clustering_cols)
+        plan.write_operation.with_schema_evolution = self.with_schema_evolution
 
         if self.num_buckets > 0:
             plan.write_operation.bucket_by.bucket_column_names.extend(self.bucket_cols)
@@ -2104,6 +2208,7 @@ def __init__(self, child: "LogicalPlan", table_name: str) -> None:
         self.table_properties: dict[str, Optional[str]] = {}
         self.mode: Optional[str] = None
         self.overwrite_condition: Optional[Column] = None
+        self.with_schema_evolution: bool = False
 
     def command(self, session: "SparkConnectClient") -> proto.Command:
         assert self._child is not None
@@ -2118,6 +2223,7 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
             [c.to_plan(session) for c in self.partitioning_columns]
         )
         plan.write_operation_v2.clustering_columns.extend(self.clustering_columns)
+        plan.write_operation_v2.with_schema_evolution = self.with_schema_evolution
 
         for k in self.options:
             if self.options[k] is None:
diff --git a/python/pyspark/sql/connect/profiler.py b/python/pyspark/sql/connect/profiler.py
index 73993aa128fb4..8c14f6a17bcca 100644
--- a/python/pyspark/sql/connect/profiler.py
+++ b/python/pyspark/sql/connect/profiler.py
@@ -16,10 +16,10 @@
 #
 from typing import TYPE_CHECKING
 
-from pyspark.sql.profiler import ProfilerCollector, ProfileResultsParam
+from pyspark.sql.profiler import ProfilerCollector, ProfileResultsParamV2
 
 if TYPE_CHECKING:
-    from pyspark.sql.profiler import ProfileResults
+    from pyspark.sql._typing import ProfileResultsV2
 
 
 class ConnectProfilerCollector(ProfilerCollector):
@@ -29,13 +29,13 @@ class ConnectProfilerCollector(ProfilerCollector):
 
     def __init__(self) -> None:
         super().__init__()
-        self._value = ProfileResultsParam.zero({})
+        self._value = ProfileResultsParamV2.zero({})
 
     @property
-    def _profile_results(self) -> "ProfileResults":
+    def _profile_results(self) -> "ProfileResultsV2":
         with self._lock:
             return self._value if self._value is not None else {}
 
-    def _update(self, update: "ProfileResults") -> None:
+    def _update(self, update: "ProfileResultsV2") -> None:
         with self._lock:
-            self._value = ProfileResultsParam.addInPlace(self._profile_results, update)
+            self._value = ProfileResultsParamV2.addInPlace(self._profile_results, update)
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.py b/python/pyspark/sql/connect/proto/commands_pb2.py
index a1f005adf9e93..78af0ddbfe318 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.py
+++ b/python/pyspark/sql/connect/proto/commands_pb2.py
@@ -44,7 +44,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x16spark/connect/ml.proto\x1a\x1dspark/connect/pipelines.proto"\xfb\x0e\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12Q\n\x12\x63heckpoint_command\x18\x0e \x01(\x0b\x32 .spark.connect.CheckpointCommandH\x00R\x11\x63heckpointCommand\x12\x84\x01\n%remove_cached_remote_relation_command\x18\x0f \x01(\x0b\x32\x30.spark.connect.RemoveCachedRemoteRelationCommandH\x00R!removeCachedRemoteRelationCommand\x12_\n\x18merge_into_table_command\x18\x10 \x01(\x0b\x32$.spark.connect.MergeIntoTableCommandH\x00R\x15mergeIntoTableCommand\x12\x39\n\nml_command\x18\x11 \x01(\x0b\x32\x18.spark.connect.MlCommandH\x00R\tmlCommand\x12\x61\n\x18\x65xecute_external_command\x18\x12 \x01(\x0b\x32%.spark.connect.ExecuteExternalCommandH\x00R\x16\x65xecuteExternalCommand\x12K\n\x10pipeline_command\x18\x13 \x01(\x0b\x32\x1e.spark.connect.PipelineCommandH\x00R\x0fpipelineCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xca\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xdc\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\x93\x07\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x39\n\x18real_time_batch_duration\x18\x64 \x01(\tH\x00R\x15realTimeBatchDuration\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x12\x36\n\x17\x63lustering_column_names\x18\x0f \x03(\tR\x15\x63lusteringColumnNames\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId"d\n!RemoveCachedRemoteRelationCommand\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation"\xcd\x01\n\x11\x43heckpointCommand\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x14\n\x05local\x18\x02 \x01(\x08R\x05local\x12\x14\n\x05\x65\x61ger\x18\x03 \x01(\x08R\x05\x65\x61ger\x12\x45\n\rstorage_level\x18\x04 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level"\xe8\x03\n\x15MergeIntoTableCommand\x12*\n\x11target_table_name\x18\x01 \x01(\tR\x0ftargetTableName\x12\x43\n\x11source_table_plan\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x0fsourceTablePlan\x12\x42\n\x0fmerge_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0emergeCondition\x12>\n\rmatch_actions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cmatchActions\x12I\n\x13not_matched_actions\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x11notMatchedActions\x12[\n\x1dnot_matched_by_source_actions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x19notMatchedBySourceActions\x12\x32\n\x15with_schema_evolution\x18\x07 \x01(\x08R\x13withSchemaEvolution"\xd4\x01\n\x16\x45xecuteExternalCommand\x12\x16\n\x06runner\x18\x01 \x01(\tR\x06runner\x12\x18\n\x07\x63ommand\x18\x02 \x01(\tR\x07\x63ommand\x12L\n\x07options\x18\x03 \x03(\x0b\x32\x32.spark.connect.ExecuteExternalCommand.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x16spark/connect/ml.proto\x1a\x1dspark/connect/pipelines.proto"\xfb\x0e\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12Q\n\x12\x63heckpoint_command\x18\x0e \x01(\x0b\x32 .spark.connect.CheckpointCommandH\x00R\x11\x63heckpointCommand\x12\x84\x01\n%remove_cached_remote_relation_command\x18\x0f \x01(\x0b\x32\x30.spark.connect.RemoveCachedRemoteRelationCommandH\x00R!removeCachedRemoteRelationCommand\x12_\n\x18merge_into_table_command\x18\x10 \x01(\x0b\x32$.spark.connect.MergeIntoTableCommandH\x00R\x15mergeIntoTableCommand\x12\x39\n\nml_command\x18\x11 \x01(\x0b\x32\x18.spark.connect.MlCommandH\x00R\tmlCommand\x12\x61\n\x18\x65xecute_external_command\x18\x12 \x01(\x0b\x32%.spark.connect.ExecuteExternalCommandH\x00R\x16\x65xecuteExternalCommand\x12K\n\x10pipeline_command\x18\x13 \x01(\x0b\x32\x1e.spark.connect.PipelineCommandH\x00R\x0fpipelineCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xfe\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x12\x32\n\x15with_schema_evolution\x18\x0b \x01(\x08R\x13withSchemaEvolution\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\x90\x07\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x12\x32\n\x15with_schema_evolution\x18\n \x01(\x08R\x13withSchemaEvolution\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\x93\x07\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x39\n\x18real_time_batch_duration\x18\x64 \x01(\tH\x00R\x15realTimeBatchDuration\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x12\x36\n\x17\x63lustering_column_names\x18\x0f \x03(\tR\x15\x63lusteringColumnNames\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId"d\n!RemoveCachedRemoteRelationCommand\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation"\xcd\x01\n\x11\x43heckpointCommand\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x14\n\x05local\x18\x02 \x01(\x08R\x05local\x12\x14\n\x05\x65\x61ger\x18\x03 \x01(\x08R\x05\x65\x61ger\x12\x45\n\rstorage_level\x18\x04 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level"\xe8\x03\n\x15MergeIntoTableCommand\x12*\n\x11target_table_name\x18\x01 \x01(\tR\x0ftargetTableName\x12\x43\n\x11source_table_plan\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x0fsourceTablePlan\x12\x42\n\x0fmerge_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0emergeCondition\x12>\n\rmatch_actions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cmatchActions\x12I\n\x13not_matched_actions\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x11notMatchedActions\x12[\n\x1dnot_matched_by_source_actions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x19notMatchedBySourceActions\x12\x32\n\x15with_schema_evolution\x18\x07 \x01(\x08R\x13withSchemaEvolution"\xd4\x01\n\x16\x45xecuteExternalCommand\x12\x16\n\x06runner\x18\x01 \x01(\tR\x06runner\x12\x18\n\x07\x63ommand\x18\x02 \x01(\tR\x07\x63ommand\x12L\n\x07options\x18\x03 \x03(\x0b\x32\x32.spark.connect.ExecuteExternalCommand.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -83,8 +83,8 @@
     _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_options = b"8\001"
     _globals["_EXECUTEEXTERNALCOMMAND_OPTIONSENTRY"]._loaded_options = None
     _globals["_EXECUTEEXTERNALCOMMAND_OPTIONSENTRY"]._serialized_options = b"8\001"
-    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_start = 11816
-    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_end = 11949
+    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_start = 11920
+    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_end = 12053
     _globals["_COMMAND"]._serialized_start = 222
     _globals["_COMMAND"]._serialized_end = 2137
     _globals["_SQLCOMMAND"]._serialized_start = 2140
@@ -96,105 +96,105 @@
     _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_start = 2697
     _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_end = 2847
     _globals["_WRITEOPERATION"]._serialized_start = 2850
-    _globals["_WRITEOPERATION"]._serialized_end = 3948
-    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_start = 3372
-    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_end = 3430
-    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_start = 3433
-    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_end = 3691
-    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_start = 3567
-    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_end = 3691
-    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_start = 3693
-    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_end = 3784
-    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_start = 3787
-    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_end = 3924
-    _globals["_WRITEOPERATIONV2"]._serialized_start = 3951
-    _globals["_WRITEOPERATIONV2"]._serialized_end = 4811
-    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_start = 3372
-    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_end = 3430
-    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_start = 4570
-    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_end = 4636
-    _globals["_WRITEOPERATIONV2_MODE"]._serialized_start = 4639
-    _globals["_WRITEOPERATIONV2_MODE"]._serialized_end = 4798
-    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_start = 4814
-    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_end = 5729
-    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_start = 3372
-    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_end = 3430
-    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_start = 5732
-    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_end = 5911
-    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_start = 5914
-    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_end = 6126
-    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_start = 6128
-    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_end = 6193
-    _globals["_STREAMINGQUERYCOMMAND"]._serialized_start = 6196
-    _globals["_STREAMINGQUERYCOMMAND"]._serialized_end = 6828
-    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_start = 6695
-    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_end = 6739
-    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_start = 6741
-    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_end = 6817
-    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_start = 6831
-    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_end = 7972
-    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_start = 7414
-    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_end = 7584
-    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_start = 7586
-    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_end = 7658
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_start = 7660
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_end = 7699
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_start = 7702
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_end = 7899
-    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_start = 7901
-    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_end = 7957
-    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_start = 7975
-    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_end = 8804
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_start = 8506
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_end = 8585
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_start = 8588
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_end = 8793
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_start = 8807
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_end = 9883
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_start = 9415
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_end = 9542
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_start = 9544
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_end = 9659
+    _globals["_WRITEOPERATION"]._serialized_end = 4000
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_start = 3424
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_end = 3482
+    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_start = 3485
+    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_end = 3743
+    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_start = 3619
+    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_end = 3743
+    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_start = 3745
+    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_end = 3836
+    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_start = 3839
+    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_end = 3976
+    _globals["_WRITEOPERATIONV2"]._serialized_start = 4003
+    _globals["_WRITEOPERATIONV2"]._serialized_end = 4915
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_start = 3424
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_end = 3482
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_start = 4674
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_end = 4740
+    _globals["_WRITEOPERATIONV2_MODE"]._serialized_start = 4743
+    _globals["_WRITEOPERATIONV2_MODE"]._serialized_end = 4902
+    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_start = 4918
+    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_end = 5833
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_start = 3424
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_end = 3482
+    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_start = 5836
+    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_end = 6015
+    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_start = 6018
+    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_end = 6230
+    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_start = 6232
+    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_end = 6297
+    _globals["_STREAMINGQUERYCOMMAND"]._serialized_start = 6300
+    _globals["_STREAMINGQUERYCOMMAND"]._serialized_end = 6932
+    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_start = 6799
+    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_end = 6843
+    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_start = 6845
+    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_end = 6921
+    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_start = 6935
+    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_end = 8076
+    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_start = 7518
+    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_end = 7688
+    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_start = 7690
+    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_end = 7762
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_start = 7764
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_end = 7803
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_start = 7806
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_end = 8003
+    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_start = 8005
+    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_end = 8061
+    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_start = 8079
+    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_end = 8908
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_start = 8610
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_end = 8689
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_start = 8692
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_end = 8897
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_start = 8911
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_end = 9987
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_start = 9519
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_end = 9646
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_start = 9648
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_end = 9763
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"
-    ]._serialized_start = 9661
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"]._serialized_end = 9720
+    ]._serialized_start = 9765
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"]._serialized_end = 9824
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE"
-    ]._serialized_start = 9722
+    ]._serialized_start = 9826
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE"
-    ]._serialized_end = 9797
+    ]._serialized_end = 9901
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT"
-    ]._serialized_start = 9799
+    ]._serialized_start = 9903
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT"
-    ]._serialized_end = 9868
-    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_start = 9886
-    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_end = 10059
-    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_start = 10062
-    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_end = 10193
-    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_start = 10196
-    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_end = 10400
-    _globals["_GETRESOURCESCOMMAND"]._serialized_start = 10402
-    _globals["_GETRESOURCESCOMMAND"]._serialized_end = 10423
-    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_start = 10426
-    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_end = 10638
-    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_start = 10542
-    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_end = 10638
-    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_start = 10640
-    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_end = 10728
-    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_start = 10730
-    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_end = 10797
-    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_start = 10799
-    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_end = 10899
-    _globals["_CHECKPOINTCOMMAND"]._serialized_start = 10902
-    _globals["_CHECKPOINTCOMMAND"]._serialized_end = 11107
-    _globals["_MERGEINTOTABLECOMMAND"]._serialized_start = 11110
-    _globals["_MERGEINTOTABLECOMMAND"]._serialized_end = 11598
-    _globals["_EXECUTEEXTERNALCOMMAND"]._serialized_start = 11601
-    _globals["_EXECUTEEXTERNALCOMMAND"]._serialized_end = 11813
-    _globals["_EXECUTEEXTERNALCOMMAND_OPTIONSENTRY"]._serialized_start = 3372
-    _globals["_EXECUTEEXTERNALCOMMAND_OPTIONSENTRY"]._serialized_end = 3430
+    ]._serialized_end = 9972
+    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_start = 9990
+    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_end = 10163
+    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_start = 10166
+    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_end = 10297
+    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_start = 10300
+    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_end = 10504
+    _globals["_GETRESOURCESCOMMAND"]._serialized_start = 10506
+    _globals["_GETRESOURCESCOMMAND"]._serialized_end = 10527
+    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_start = 10530
+    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_end = 10742
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_start = 10646
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_end = 10742
+    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_start = 10744
+    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_end = 10832
+    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_start = 10834
+    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_end = 10901
+    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_start = 10903
+    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_end = 11003
+    _globals["_CHECKPOINTCOMMAND"]._serialized_start = 11006
+    _globals["_CHECKPOINTCOMMAND"]._serialized_end = 11211
+    _globals["_MERGEINTOTABLECOMMAND"]._serialized_start = 11214
+    _globals["_MERGEINTOTABLECOMMAND"]._serialized_end = 11702
+    _globals["_EXECUTEEXTERNALCOMMAND"]._serialized_start = 11705
+    _globals["_EXECUTEEXTERNALCOMMAND"]._serialized_end = 11917
+    _globals["_EXECUTEEXTERNALCOMMAND_OPTIONSENTRY"]._serialized_start = 3424
+    _globals["_EXECUTEEXTERNALCOMMAND_OPTIONSENTRY"]._serialized_end = 3482
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.pyi b/python/pyspark/sql/connect/proto/commands_pb2.pyi
index e0ead65e8922e..cc1330f11f8ac 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/commands_pb2.pyi
@@ -621,6 +621,7 @@ class WriteOperation(google.protobuf.message.Message):
     BUCKET_BY_FIELD_NUMBER: builtins.int
     OPTIONS_FIELD_NUMBER: builtins.int
     CLUSTERING_COLUMNS_FIELD_NUMBER: builtins.int
+    WITH_SCHEMA_EVOLUTION_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
         """(Required) The output of the `input` relation will be persisted according to the options."""
@@ -654,6 +655,8 @@ class WriteOperation(google.protobuf.message.Message):
         self,
     ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
         """(Optional) Columns used for clustering the table."""
+    with_schema_evolution: builtins.bool
+    """(Optional) Whether schema evolution is enabled for the write."""
     def __init__(
         self,
         *,
@@ -667,6 +670,7 @@ class WriteOperation(google.protobuf.message.Message):
         bucket_by: global___WriteOperation.BucketBy | None = ...,
         options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
         clustering_columns: collections.abc.Iterable[builtins.str] | None = ...,
+        with_schema_evolution: builtins.bool = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -714,6 +718,8 @@ class WriteOperation(google.protobuf.message.Message):
             b"source",
             "table",
             b"table",
+            "with_schema_evolution",
+            b"with_schema_evolution",
         ],
     ) -> None: ...
     @typing.overload
@@ -803,6 +809,7 @@ class WriteOperationV2(google.protobuf.message.Message):
     MODE_FIELD_NUMBER: builtins.int
     OVERWRITE_CONDITION_FIELD_NUMBER: builtins.int
     CLUSTERING_COLUMNS_FIELD_NUMBER: builtins.int
+    WITH_SCHEMA_EVOLUTION_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
         """(Required) The output of the `input` relation will be persisted according to the options."""
@@ -839,6 +846,8 @@ class WriteOperationV2(google.protobuf.message.Message):
         self,
     ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
         """(Optional) Columns used for clustering the table."""
+    with_schema_evolution: builtins.bool
+    """(Optional) Whether schema evolution is enabled for the write."""
     def __init__(
         self,
         *,
@@ -854,6 +863,7 @@ class WriteOperationV2(google.protobuf.message.Message):
         mode: global___WriteOperationV2.Mode.ValueType = ...,
         overwrite_condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
         clustering_columns: collections.abc.Iterable[builtins.str] | None = ...,
+        with_schema_evolution: builtins.bool = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -891,6 +901,8 @@ class WriteOperationV2(google.protobuf.message.Message):
             b"table_name",
             "table_properties",
             b"table_properties",
+            "with_schema_evolution",
+            b"with_schema_evolution",
         ],
     ) -> None: ...
     def WhichOneof(
diff --git a/python/pyspark/sql/connect/proto/pipelines_pb2.py b/python/pyspark/sql/connect/proto/pipelines_pb2.py
index 4cca17b39cd65..ec8142c1d2aa6 100644
--- a/python/pyspark/sql/connect/proto/pipelines_pb2.py
+++ b/python/pyspark/sql/connect/proto/pipelines_pb2.py
@@ -38,12 +38,13 @@
 from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
 from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
 from pyspark.sql.connect.proto import common_pb2 as spark_dot_connect_dot_common__pb2
+from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
 from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
 from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/pipelines.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"\xa4\'\n\x0fPipelineCommand\x12h\n\x15\x63reate_dataflow_graph\x18\x01 \x01(\x0b\x32\x32.spark.connect.PipelineCommand.CreateDataflowGraphH\x00R\x13\x63reateDataflowGraph\x12R\n\rdefine_output\x18\x02 \x01(\x0b\x32+.spark.connect.PipelineCommand.DefineOutputH\x00R\x0c\x64\x65\x66ineOutput\x12L\n\x0b\x64\x65\x66ine_flow\x18\x03 \x01(\x0b\x32).spark.connect.PipelineCommand.DefineFlowH\x00R\ndefineFlow\x12\x62\n\x13\x64rop_dataflow_graph\x18\x04 \x01(\x0b\x32\x30.spark.connect.PipelineCommand.DropDataflowGraphH\x00R\x11\x64ropDataflowGraph\x12\x46\n\tstart_run\x18\x05 \x01(\x0b\x32\'.spark.connect.PipelineCommand.StartRunH\x00R\x08startRun\x12r\n\x19\x64\x65\x66ine_sql_graph_elements\x18\x06 \x01(\x0b\x32\x35.spark.connect.PipelineCommand.DefineSqlGraphElementsH\x00R\x16\x64\x65\x66ineSqlGraphElements\x12\xa1\x01\n*get_query_function_execution_signal_stream\x18\x07 \x01(\x0b\x32\x44.spark.connect.PipelineCommand.GetQueryFunctionExecutionSignalStreamH\x00R%getQueryFunctionExecutionSignalStream\x12\x88\x01\n!define_flow_query_function_result\x18\x08 \x01(\x0b\x32<.spark.connect.PipelineCommand.DefineFlowQueryFunctionResultH\x00R\x1d\x64\x65\x66ineFlowQueryFunctionResult\x12\x65\n\x14\x65xecute_output_flows\x18\t \x01(\x0b\x32\x31.spark.connect.PipelineCommand.ExecuteOutputFlowsH\x00R\x12\x65xecuteOutputFlows\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\xb4\x02\n\x13\x43reateDataflowGraph\x12,\n\x0f\x64\x65\x66\x61ult_catalog\x18\x01 \x01(\tH\x00R\x0e\x64\x65\x66\x61ultCatalog\x88\x01\x01\x12.\n\x10\x64\x65\x66\x61ult_database\x18\x02 \x01(\tH\x01R\x0f\x64\x65\x66\x61ultDatabase\x88\x01\x01\x12Z\n\x08sql_conf\x18\x05 \x03(\x0b\x32?.spark.connect.PipelineCommand.CreateDataflowGraph.SqlConfEntryR\x07sqlConf\x1a:\n\x0cSqlConfEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x12\n\x10_default_catalogB\x13\n\x11_default_database\x1aZ\n\x11\x44ropDataflowGraph\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1a\x92\n\n\x0c\x44\x65\x66ineOutput\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12$\n\x0boutput_name\x18\x02 \x01(\tH\x02R\noutputName\x88\x01\x01\x12?\n\x0boutput_type\x18\x03 \x01(\x0e\x32\x19.spark.connect.OutputTypeH\x03R\noutputType\x88\x01\x01\x12\x1d\n\x07\x63omment\x18\x04 \x01(\tH\x04R\x07\x63omment\x88\x01\x01\x12X\n\x14source_code_location\x18\x05 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05R\x12sourceCodeLocation\x88\x01\x01\x12_\n\rtable_details\x18\x06 \x01(\x0b\x32\x38.spark.connect.PipelineCommand.DefineOutput.TableDetailsH\x00R\x0ctableDetails\x12\\\n\x0csink_details\x18\x07 \x01(\x0b\x32\x37.spark.connect.PipelineCommand.DefineOutput.SinkDetailsH\x00R\x0bsinkDetails\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\xc0\x03\n\x0cTableDetails\x12x\n\x10table_properties\x18\x01 \x03(\x0b\x32M.spark.connect.PipelineCommand.DefineOutput.TableDetails.TablePropertiesEntryR\x0ftableProperties\x12%\n\x0epartition_cols\x18\x02 \x03(\tR\rpartitionCols\x12\x1b\n\x06\x66ormat\x18\x03 \x01(\tH\x01R\x06\x66ormat\x88\x01\x01\x12\x43\n\x10schema_data_type\x18\x04 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x0eschemaDataType\x12%\n\rschema_string\x18\x05 \x01(\tH\x00R\x0cschemaString\x12-\n\x12\x63lustering_columns\x18\x06 \x03(\tR\x11\x63lusteringColumns\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x08\n\x06schemaB\t\n\x07_format\x1a\xd1\x01\n\x0bSinkDetails\x12^\n\x07options\x18\x01 \x03(\x0b\x32\x44.spark.connect.PipelineCommand.DefineOutput.SinkDetails.OptionsEntryR\x07options\x12\x1b\n\x06\x66ormat\x18\x02 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0e\n\x0c_output_nameB\x0e\n\x0c_output_typeB\n\n\x08_commentB\x17\n\x15_source_code_location\x1a\xff\x06\n\nDefineFlow\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12 \n\tflow_name\x18\x02 \x01(\tH\x02R\x08\x66lowName\x88\x01\x01\x12\x33\n\x13target_dataset_name\x18\x03 \x01(\tH\x03R\x11targetDatasetName\x88\x01\x01\x12Q\n\x08sql_conf\x18\x04 \x03(\x0b\x32\x36.spark.connect.PipelineCommand.DefineFlow.SqlConfEntryR\x07sqlConf\x12 \n\tclient_id\x18\x05 \x01(\tH\x04R\x08\x63lientId\x88\x01\x01\x12X\n\x14source_code_location\x18\x06 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05R\x12sourceCodeLocation\x88\x01\x01\x12x\n\x15relation_flow_details\x18\x07 \x01(\x0b\x32\x42.spark.connect.PipelineCommand.DefineFlow.WriteRelationFlowDetailsH\x00R\x13relationFlowDetails\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x17\n\x04once\x18\x08 \x01(\x08H\x06R\x04once\x88\x01\x01\x1a:\n\x0cSqlConfEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x61\n\x18WriteRelationFlowDetails\x12\x38\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x08relation\x88\x01\x01\x42\x0b\n\t_relation\x1a:\n\x08Response\x12 \n\tflow_name\x18\x01 \x01(\tH\x00R\x08\x66lowName\x88\x01\x01\x42\x0c\n\n_flow_nameB\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0c\n\n_flow_nameB\x16\n\x14_target_dataset_nameB\x0c\n\n_client_idB\x17\n\x15_source_code_locationB\x07\n\x05_once\x1a\xe4\x02\n\x12\x45xecuteOutputFlows\x12U\n\rdefine_output\x18\x01 \x01(\x0b\x32+.spark.connect.PipelineCommand.DefineOutputH\x00R\x0c\x64\x65\x66ineOutput\x88\x01\x01\x12L\n\x0c\x64\x65\x66ine_flows\x18\x02 \x03(\x0b\x32).spark.connect.PipelineCommand.DefineFlowR\x0b\x64\x65\x66ineFlows\x12&\n\x0c\x66ull_refresh\x18\x03 \x01(\x08H\x01R\x0b\x66ullRefresh\x88\x01\x01\x12\x1d\n\x07storage\x18\x04 \x01(\tH\x02R\x07storage\x88\x01\x01\x12\x33\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\textensionB\x10\n\x0e_define_outputB\x0f\n\r_full_refreshB\n\n\x08_storage\x1a\xc2\x02\n\x08StartRun\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12\x34\n\x16\x66ull_refresh_selection\x18\x02 \x03(\tR\x14\x66ullRefreshSelection\x12-\n\x10\x66ull_refresh_all\x18\x03 \x01(\x08H\x01R\x0e\x66ullRefreshAll\x88\x01\x01\x12+\n\x11refresh_selection\x18\x04 \x03(\tR\x10refreshSelection\x12\x15\n\x03\x64ry\x18\x05 \x01(\x08H\x02R\x03\x64ry\x88\x01\x01\x12\x1d\n\x07storage\x18\x06 \x01(\tH\x03R\x07storage\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x13\n\x11_full_refresh_allB\x06\n\x04_dryB\n\n\x08_storage\x1a\xc7\x01\n\x16\x44\x65\x66ineSqlGraphElements\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12\'\n\rsql_file_path\x18\x02 \x01(\tH\x01R\x0bsqlFilePath\x88\x01\x01\x12\x1e\n\x08sql_text\x18\x03 \x01(\tH\x02R\x07sqlText\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x10\n\x0e_sql_file_pathB\x0b\n\t_sql_text\x1a\x9e\x01\n%GetQueryFunctionExecutionSignalStream\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12 \n\tclient_id\x18\x02 \x01(\tH\x01R\x08\x63lientId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x0c\n\n_client_id\x1a\xc6\x02\n\x1d\x44\x65\x66ineFlowQueryFunctionResult\x12$\n\tflow_name\x18\x01 \x01(\tB\x02\x18\x01H\x00R\x08\x66lowName\x88\x01\x01\x12O\n\x0f\x66low_identifier\x18\x04 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x01R\x0e\x66lowIdentifier\x88\x01\x01\x12/\n\x11\x64\x61taflow_graph_id\x18\x02 \x01(\tH\x02R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12\x38\n\x08relation\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x03R\x08relation\x88\x01\x01\x42\x0c\n\n_flow_nameB\x12\n\x10_flow_identifierB\x14\n\x12_dataflow_graph_idB\x0b\n\t_relationB\x0e\n\x0c\x63ommand_type"\xf0\x05\n\x15PipelineCommandResult\x12\x81\x01\n\x1c\x63reate_dataflow_graph_result\x18\x01 \x01(\x0b\x32>.spark.connect.PipelineCommandResult.CreateDataflowGraphResultH\x00R\x19\x63reateDataflowGraphResult\x12k\n\x14\x64\x65\x66ine_output_result\x18\x02 \x01(\x0b\x32\x37.spark.connect.PipelineCommandResult.DefineOutputResultH\x00R\x12\x64\x65\x66ineOutputResult\x12\x65\n\x12\x64\x65\x66ine_flow_result\x18\x03 \x01(\x0b\x32\x35.spark.connect.PipelineCommandResult.DefineFlowResultH\x00R\x10\x64\x65\x66ineFlowResult\x1a\x62\n\x19\x43reateDataflowGraphResult\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1a\x85\x01\n\x12\x44\x65\x66ineOutputResult\x12W\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00R\x12resolvedIdentifier\x88\x01\x01\x42\x16\n\x14_resolved_identifier\x1a\x83\x01\n\x10\x44\x65\x66ineFlowResult\x12W\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00R\x12resolvedIdentifier\x88\x01\x01\x42\x16\n\x14_resolved_identifierB\r\n\x0bresult_type"I\n\x13PipelineEventResult\x12\x32\n\x05\x65vent\x18\x01 \x01(\x0b\x32\x1c.spark.connect.PipelineEventR\x05\x65vent"t\n\rPipelineEvent\x12\x38\n\ttimestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\ttimestamp\x12\x1d\n\x07message\x18\x02 \x01(\tH\x00R\x07message\x88\x01\x01\x42\n\n\x08_message"\xf1\x01\n\x12SourceCodeLocation\x12 \n\tfile_name\x18\x01 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12$\n\x0bline_number\x18\x02 \x01(\x05H\x01R\nlineNumber\x88\x01\x01\x12,\n\x0f\x64\x65\x66inition_path\x18\x03 \x01(\tH\x02R\x0e\x64\x65\x66initionPath\x88\x01\x01\x12\x33\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\textensionB\x0c\n\n_file_nameB\x0e\n\x0c_line_numberB\x12\n\x10_definition_path"\x97\x01\n$PipelineQueryFunctionExecutionSignal\x12!\n\nflow_names\x18\x01 \x03(\tB\x02\x18\x01R\tflowNames\x12L\n\x10\x66low_identifiers\x18\x02 \x03(\x0b\x32!.spark.connect.ResolvedIdentifierR\x0f\x66lowIdentifiers"\xf0\x02\n\x17PipelineAnalysisContext\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12,\n\x0f\x64\x65\x66inition_path\x18\x02 \x01(\tH\x01R\x0e\x64\x65\x66initionPath\x88\x01\x01\x12$\n\tflow_name\x18\x03 \x01(\tB\x02\x18\x01H\x02R\x08\x66lowName\x88\x01\x01\x12O\n\x0f\x66low_identifier\x18\x04 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x03R\x0e\x66lowIdentifier\x88\x01\x01\x12\x33\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\textensionB\x14\n\x12_dataflow_graph_idB\x12\n\x10_definition_pathB\x0c\n\n_flow_nameB\x12\n\x10_flow_identifier*i\n\nOutputType\x12\x1b\n\x17OUTPUT_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MATERIALIZED_VIEW\x10\x01\x12\t\n\x05TABLE\x10\x02\x12\x12\n\x0eTEMPORARY_VIEW\x10\x03\x12\x08\n\x04SINK\x10\x04\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/pipelines.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"\x83/\n\x0fPipelineCommand\x12h\n\x15\x63reate_dataflow_graph\x18\x01 \x01(\x0b\x32\x32.spark.connect.PipelineCommand.CreateDataflowGraphH\x00R\x13\x63reateDataflowGraph\x12R\n\rdefine_output\x18\x02 \x01(\x0b\x32+.spark.connect.PipelineCommand.DefineOutputH\x00R\x0c\x64\x65\x66ineOutput\x12L\n\x0b\x64\x65\x66ine_flow\x18\x03 \x01(\x0b\x32).spark.connect.PipelineCommand.DefineFlowH\x00R\ndefineFlow\x12\x62\n\x13\x64rop_dataflow_graph\x18\x04 \x01(\x0b\x32\x30.spark.connect.PipelineCommand.DropDataflowGraphH\x00R\x11\x64ropDataflowGraph\x12\x46\n\tstart_run\x18\x05 \x01(\x0b\x32\'.spark.connect.PipelineCommand.StartRunH\x00R\x08startRun\x12r\n\x19\x64\x65\x66ine_sql_graph_elements\x18\x06 \x01(\x0b\x32\x35.spark.connect.PipelineCommand.DefineSqlGraphElementsH\x00R\x16\x64\x65\x66ineSqlGraphElements\x12\xa1\x01\n*get_query_function_execution_signal_stream\x18\x07 \x01(\x0b\x32\x44.spark.connect.PipelineCommand.GetQueryFunctionExecutionSignalStreamH\x00R%getQueryFunctionExecutionSignalStream\x12\x88\x01\n!define_flow_query_function_result\x18\x08 \x01(\x0b\x32<.spark.connect.PipelineCommand.DefineFlowQueryFunctionResultH\x00R\x1d\x64\x65\x66ineFlowQueryFunctionResult\x12\x65\n\x14\x65xecute_output_flows\x18\t \x01(\x0b\x32\x31.spark.connect.PipelineCommand.ExecuteOutputFlowsH\x00R\x12\x65xecuteOutputFlows\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\xb4\x02\n\x13\x43reateDataflowGraph\x12,\n\x0f\x64\x65\x66\x61ult_catalog\x18\x01 \x01(\tH\x00R\x0e\x64\x65\x66\x61ultCatalog\x88\x01\x01\x12.\n\x10\x64\x65\x66\x61ult_database\x18\x02 \x01(\tH\x01R\x0f\x64\x65\x66\x61ultDatabase\x88\x01\x01\x12Z\n\x08sql_conf\x18\x05 \x03(\x0b\x32?.spark.connect.PipelineCommand.CreateDataflowGraph.SqlConfEntryR\x07sqlConf\x1a:\n\x0cSqlConfEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x12\n\x10_default_catalogB\x13\n\x11_default_database\x1aZ\n\x11\x44ropDataflowGraph\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1a\x92\n\n\x0c\x44\x65\x66ineOutput\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12$\n\x0boutput_name\x18\x02 \x01(\tH\x02R\noutputName\x88\x01\x01\x12?\n\x0boutput_type\x18\x03 \x01(\x0e\x32\x19.spark.connect.OutputTypeH\x03R\noutputType\x88\x01\x01\x12\x1d\n\x07\x63omment\x18\x04 \x01(\tH\x04R\x07\x63omment\x88\x01\x01\x12X\n\x14source_code_location\x18\x05 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05R\x12sourceCodeLocation\x88\x01\x01\x12_\n\rtable_details\x18\x06 \x01(\x0b\x32\x38.spark.connect.PipelineCommand.DefineOutput.TableDetailsH\x00R\x0ctableDetails\x12\\\n\x0csink_details\x18\x07 \x01(\x0b\x32\x37.spark.connect.PipelineCommand.DefineOutput.SinkDetailsH\x00R\x0bsinkDetails\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\xc0\x03\n\x0cTableDetails\x12x\n\x10table_properties\x18\x01 \x03(\x0b\x32M.spark.connect.PipelineCommand.DefineOutput.TableDetails.TablePropertiesEntryR\x0ftableProperties\x12%\n\x0epartition_cols\x18\x02 \x03(\tR\rpartitionCols\x12\x1b\n\x06\x66ormat\x18\x03 \x01(\tH\x01R\x06\x66ormat\x88\x01\x01\x12\x43\n\x10schema_data_type\x18\x04 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x0eschemaDataType\x12%\n\rschema_string\x18\x05 \x01(\tH\x00R\x0cschemaString\x12-\n\x12\x63lustering_columns\x18\x06 \x03(\tR\x11\x63lusteringColumns\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x08\n\x06schemaB\t\n\x07_format\x1a\xd1\x01\n\x0bSinkDetails\x12^\n\x07options\x18\x01 \x03(\x0b\x32\x44.spark.connect.PipelineCommand.DefineOutput.SinkDetails.OptionsEntryR\x07options\x12\x1b\n\x06\x66ormat\x18\x02 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0e\n\x0c_output_nameB\x0e\n\x0c_output_typeB\n\n\x08_commentB\x17\n\x15_source_code_location\x1a\xde\x0e\n\nDefineFlow\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x01R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12 \n\tflow_name\x18\x02 \x01(\tH\x02R\x08\x66lowName\x88\x01\x01\x12\x33\n\x13target_dataset_name\x18\x03 \x01(\tH\x03R\x11targetDatasetName\x88\x01\x01\x12Q\n\x08sql_conf\x18\x04 \x03(\x0b\x32\x36.spark.connect.PipelineCommand.DefineFlow.SqlConfEntryR\x07sqlConf\x12 \n\tclient_id\x18\x05 \x01(\tH\x04R\x08\x63lientId\x88\x01\x01\x12X\n\x14source_code_location\x18\x06 \x01(\x0b\x32!.spark.connect.SourceCodeLocationH\x05R\x12sourceCodeLocation\x88\x01\x01\x12x\n\x15relation_flow_details\x18\x07 \x01(\x0b\x32\x42.spark.connect.PipelineCommand.DefineFlow.WriteRelationFlowDetailsH\x00R\x13relationFlowDetails\x12q\n\x15\x61uto_cdc_flow_details\x18\n \x01(\x0b\x32<.spark.connect.PipelineCommand.DefineFlow.AutoCdcFlowDetailsH\x00R\x12\x61utoCdcFlowDetails\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x17\n\x04once\x18\x08 \x01(\x08H\x06R\x04once\x88\x01\x01\x1a:\n\x0cSqlConfEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x61\n\x18WriteRelationFlowDetails\x12\x38\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x08relation\x88\x01\x01\x42\x0b\n\t_relation\x1a\xb4\x06\n\x12\x41utoCdcFlowDetails\x12\x1b\n\x06source\x18\x01 \x01(\tH\x00R\x06source\x88\x01\x01\x12-\n\x04keys\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x04keys\x12?\n\x0bsequence_by\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x01R\nsequenceBy\x88\x01\x01\x12H\n\x10\x61pply_as_deletes\x18\x06 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x02R\x0e\x61pplyAsDeletes\x88\x01\x01\x12L\n\x12\x61pply_as_truncates\x18\x07 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x03R\x10\x61pplyAsTruncates\x88\x01\x01\x12:\n\x0b\x63olumn_list\x18\x08 \x03(\x0b\x32\x19.spark.connect.ExpressionR\ncolumnList\x12G\n\x12\x65xcept_column_list\x18\t \x03(\x0b\x32\x19.spark.connect.ExpressionR\x10\x65xceptColumnList\x12^\n\x12stored_as_scd_type\x18\n \x01(\x0e\x32\x31.spark.connect.PipelineCommand.DefineFlow.SCDTypeR\x0fstoredAsScdType\x12_\n\x1fignore_null_updates_column_list\x18\x0e \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1bignoreNullUpdatesColumnList\x12l\n&ignore_null_updates_except_column_list\x18\x0f \x03(\x0b\x32\x19.spark.connect.ExpressionR!ignoreNullUpdatesExceptColumnListB\t\n\x07_sourceB\x0e\n\x0c_sequence_byB\x13\n\x11_apply_as_deletesB\x15\n\x13_apply_as_truncates\x1a:\n\x08Response\x12 \n\tflow_name\x18\x01 \x01(\tH\x00R\x08\x66lowName\x88\x01\x01\x42\x0c\n\n_flow_name"3\n\x07SCDType\x12\x18\n\x14SCD_TYPE_UNSPECIFIED\x10\x00\x12\x0e\n\nSCD_TYPE_1\x10\x01\x42\t\n\x07\x64\x65tailsB\x14\n\x12_dataflow_graph_idB\x0c\n\n_flow_nameB\x16\n\x14_target_dataset_nameB\x0c\n\n_client_idB\x17\n\x15_source_code_locationB\x07\n\x05_once\x1a\xe4\x02\n\x12\x45xecuteOutputFlows\x12U\n\rdefine_output\x18\x01 \x01(\x0b\x32+.spark.connect.PipelineCommand.DefineOutputH\x00R\x0c\x64\x65\x66ineOutput\x88\x01\x01\x12L\n\x0c\x64\x65\x66ine_flows\x18\x02 \x03(\x0b\x32).spark.connect.PipelineCommand.DefineFlowR\x0b\x64\x65\x66ineFlows\x12&\n\x0c\x66ull_refresh\x18\x03 \x01(\x08H\x01R\x0b\x66ullRefresh\x88\x01\x01\x12\x1d\n\x07storage\x18\x04 \x01(\tH\x02R\x07storage\x88\x01\x01\x12\x33\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\textensionB\x10\n\x0e_define_outputB\x0f\n\r_full_refreshB\n\n\x08_storage\x1a\xc2\x02\n\x08StartRun\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12\x34\n\x16\x66ull_refresh_selection\x18\x02 \x03(\tR\x14\x66ullRefreshSelection\x12-\n\x10\x66ull_refresh_all\x18\x03 \x01(\x08H\x01R\x0e\x66ullRefreshAll\x88\x01\x01\x12+\n\x11refresh_selection\x18\x04 \x03(\tR\x10refreshSelection\x12\x15\n\x03\x64ry\x18\x05 \x01(\x08H\x02R\x03\x64ry\x88\x01\x01\x12\x1d\n\x07storage\x18\x06 \x01(\tH\x03R\x07storage\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x13\n\x11_full_refresh_allB\x06\n\x04_dryB\n\n\x08_storage\x1a\xc7\x01\n\x16\x44\x65\x66ineSqlGraphElements\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12\'\n\rsql_file_path\x18\x02 \x01(\tH\x01R\x0bsqlFilePath\x88\x01\x01\x12\x1e\n\x08sql_text\x18\x03 \x01(\tH\x02R\x07sqlText\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x10\n\x0e_sql_file_pathB\x0b\n\t_sql_text\x1a\x9e\x01\n%GetQueryFunctionExecutionSignalStream\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12 \n\tclient_id\x18\x02 \x01(\tH\x01R\x08\x63lientId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x0c\n\n_client_id\x1a\xc6\x02\n\x1d\x44\x65\x66ineFlowQueryFunctionResult\x12$\n\tflow_name\x18\x01 \x01(\tB\x02\x18\x01H\x00R\x08\x66lowName\x88\x01\x01\x12O\n\x0f\x66low_identifier\x18\x04 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x01R\x0e\x66lowIdentifier\x88\x01\x01\x12/\n\x11\x64\x61taflow_graph_id\x18\x02 \x01(\tH\x02R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12\x38\n\x08relation\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x03R\x08relation\x88\x01\x01\x42\x0c\n\n_flow_nameB\x12\n\x10_flow_identifierB\x14\n\x12_dataflow_graph_idB\x0b\n\t_relationB\x0e\n\x0c\x63ommand_type"\xf0\x05\n\x15PipelineCommandResult\x12\x81\x01\n\x1c\x63reate_dataflow_graph_result\x18\x01 \x01(\x0b\x32>.spark.connect.PipelineCommandResult.CreateDataflowGraphResultH\x00R\x19\x63reateDataflowGraphResult\x12k\n\x14\x64\x65\x66ine_output_result\x18\x02 \x01(\x0b\x32\x37.spark.connect.PipelineCommandResult.DefineOutputResultH\x00R\x12\x64\x65\x66ineOutputResult\x12\x65\n\x12\x64\x65\x66ine_flow_result\x18\x03 \x01(\x0b\x32\x35.spark.connect.PipelineCommandResult.DefineFlowResultH\x00R\x10\x64\x65\x66ineFlowResult\x1a\x62\n\x19\x43reateDataflowGraphResult\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1a\x85\x01\n\x12\x44\x65\x66ineOutputResult\x12W\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00R\x12resolvedIdentifier\x88\x01\x01\x42\x16\n\x14_resolved_identifier\x1a\x83\x01\n\x10\x44\x65\x66ineFlowResult\x12W\n\x13resolved_identifier\x18\x01 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x00R\x12resolvedIdentifier\x88\x01\x01\x42\x16\n\x14_resolved_identifierB\r\n\x0bresult_type"I\n\x13PipelineEventResult\x12\x32\n\x05\x65vent\x18\x01 \x01(\x0b\x32\x1c.spark.connect.PipelineEventR\x05\x65vent"t\n\rPipelineEvent\x12\x38\n\ttimestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\ttimestamp\x12\x1d\n\x07message\x18\x02 \x01(\tH\x00R\x07message\x88\x01\x01\x42\n\n\x08_message"\xf1\x01\n\x12SourceCodeLocation\x12 \n\tfile_name\x18\x01 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12$\n\x0bline_number\x18\x02 \x01(\x05H\x01R\nlineNumber\x88\x01\x01\x12,\n\x0f\x64\x65\x66inition_path\x18\x03 \x01(\tH\x02R\x0e\x64\x65\x66initionPath\x88\x01\x01\x12\x33\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\textensionB\x0c\n\n_file_nameB\x0e\n\x0c_line_numberB\x12\n\x10_definition_path"\x97\x01\n$PipelineQueryFunctionExecutionSignal\x12!\n\nflow_names\x18\x01 \x03(\tB\x02\x18\x01R\tflowNames\x12L\n\x10\x66low_identifiers\x18\x02 \x03(\x0b\x32!.spark.connect.ResolvedIdentifierR\x0f\x66lowIdentifiers"\xf0\x02\n\x17PipelineAnalysisContext\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12,\n\x0f\x64\x65\x66inition_path\x18\x02 \x01(\tH\x01R\x0e\x64\x65\x66initionPath\x88\x01\x01\x12$\n\tflow_name\x18\x03 \x01(\tB\x02\x18\x01H\x02R\x08\x66lowName\x88\x01\x01\x12O\n\x0f\x66low_identifier\x18\x04 \x01(\x0b\x32!.spark.connect.ResolvedIdentifierH\x03R\x0e\x66lowIdentifier\x88\x01\x01\x12\x33\n\textension\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\textensionB\x14\n\x12_dataflow_graph_idB\x12\n\x10_definition_pathB\x0c\n\n_flow_nameB\x12\n\x10_flow_identifier*i\n\nOutputType\x12\x1b\n\x17OUTPUT_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MATERIALIZED_VIEW\x10\x01\x12\t\n\x05TABLE\x10\x02\x12\x12\n\x0eTEMPORARY_VIEW\x10\x03\x12\x08\n\x04SINK\x10\x04\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -86,64 +87,68 @@
     _globals["_PIPELINEANALYSISCONTEXT"].fields_by_name[
         "flow_name"
     ]._serialized_options = b"\030\001"
-    _globals["_OUTPUTTYPE"]._serialized_start = 6942
-    _globals["_OUTPUTTYPE"]._serialized_end = 7047
-    _globals["_PIPELINECOMMAND"]._serialized_start = 195
-    _globals["_PIPELINECOMMAND"]._serialized_end = 5223
-    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH"]._serialized_start = 1232
-    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH"]._serialized_end = 1540
-    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._serialized_start = 1441
-    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._serialized_end = 1499
-    _globals["_PIPELINECOMMAND_DROPDATAFLOWGRAPH"]._serialized_start = 1542
-    _globals["_PIPELINECOMMAND_DROPDATAFLOWGRAPH"]._serialized_end = 1632
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT"]._serialized_start = 1635
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT"]._serialized_end = 2933
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_TABLEDETAILS"]._serialized_start = 2171
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_TABLEDETAILS"]._serialized_end = 2619
+    _globals["_OUTPUTTYPE"]._serialized_start = 7966
+    _globals["_OUTPUTTYPE"]._serialized_end = 8071
+    _globals["_PIPELINECOMMAND"]._serialized_start = 228
+    _globals["_PIPELINECOMMAND"]._serialized_end = 6247
+    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH"]._serialized_start = 1265
+    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH"]._serialized_end = 1573
+    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._serialized_start = 1474
+    _globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._serialized_end = 1532
+    _globals["_PIPELINECOMMAND_DROPDATAFLOWGRAPH"]._serialized_start = 1575
+    _globals["_PIPELINECOMMAND_DROPDATAFLOWGRAPH"]._serialized_end = 1665
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT"]._serialized_start = 1668
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT"]._serialized_end = 2966
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_TABLEDETAILS"]._serialized_start = 2204
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_TABLEDETAILS"]._serialized_end = 2652
     _globals[
         "_PIPELINECOMMAND_DEFINEOUTPUT_TABLEDETAILS_TABLEPROPERTIESENTRY"
-    ]._serialized_start = 2532
+    ]._serialized_start = 2565
     _globals[
         "_PIPELINECOMMAND_DEFINEOUTPUT_TABLEDETAILS_TABLEPROPERTIESENTRY"
-    ]._serialized_end = 2598
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS"]._serialized_start = 2622
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS"]._serialized_end = 2831
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS_OPTIONSENTRY"]._serialized_start = 2762
-    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS_OPTIONSENTRY"]._serialized_end = 2820
-    _globals["_PIPELINECOMMAND_DEFINEFLOW"]._serialized_start = 2936
-    _globals["_PIPELINECOMMAND_DEFINEFLOW"]._serialized_end = 3831
-    _globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._serialized_start = 1441
-    _globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._serialized_end = 1499
-    _globals["_PIPELINECOMMAND_DEFINEFLOW_WRITERELATIONFLOWDETAILS"]._serialized_start = 3555
-    _globals["_PIPELINECOMMAND_DEFINEFLOW_WRITERELATIONFLOWDETAILS"]._serialized_end = 3652
-    _globals["_PIPELINECOMMAND_DEFINEFLOW_RESPONSE"]._serialized_start = 3654
-    _globals["_PIPELINECOMMAND_DEFINEFLOW_RESPONSE"]._serialized_end = 3712
-    _globals["_PIPELINECOMMAND_EXECUTEOUTPUTFLOWS"]._serialized_start = 3834
-    _globals["_PIPELINECOMMAND_EXECUTEOUTPUTFLOWS"]._serialized_end = 4190
-    _globals["_PIPELINECOMMAND_STARTRUN"]._serialized_start = 4193
-    _globals["_PIPELINECOMMAND_STARTRUN"]._serialized_end = 4515
-    _globals["_PIPELINECOMMAND_DEFINESQLGRAPHELEMENTS"]._serialized_start = 4518
-    _globals["_PIPELINECOMMAND_DEFINESQLGRAPHELEMENTS"]._serialized_end = 4717
-    _globals["_PIPELINECOMMAND_GETQUERYFUNCTIONEXECUTIONSIGNALSTREAM"]._serialized_start = 4720
-    _globals["_PIPELINECOMMAND_GETQUERYFUNCTIONEXECUTIONSIGNALSTREAM"]._serialized_end = 4878
-    _globals["_PIPELINECOMMAND_DEFINEFLOWQUERYFUNCTIONRESULT"]._serialized_start = 4881
-    _globals["_PIPELINECOMMAND_DEFINEFLOWQUERYFUNCTIONRESULT"]._serialized_end = 5207
-    _globals["_PIPELINECOMMANDRESULT"]._serialized_start = 5226
-    _globals["_PIPELINECOMMANDRESULT"]._serialized_end = 5978
-    _globals["_PIPELINECOMMANDRESULT_CREATEDATAFLOWGRAPHRESULT"]._serialized_start = 5595
-    _globals["_PIPELINECOMMANDRESULT_CREATEDATAFLOWGRAPHRESULT"]._serialized_end = 5693
-    _globals["_PIPELINECOMMANDRESULT_DEFINEOUTPUTRESULT"]._serialized_start = 5696
-    _globals["_PIPELINECOMMANDRESULT_DEFINEOUTPUTRESULT"]._serialized_end = 5829
-    _globals["_PIPELINECOMMANDRESULT_DEFINEFLOWRESULT"]._serialized_start = 5832
-    _globals["_PIPELINECOMMANDRESULT_DEFINEFLOWRESULT"]._serialized_end = 5963
-    _globals["_PIPELINEEVENTRESULT"]._serialized_start = 5980
-    _globals["_PIPELINEEVENTRESULT"]._serialized_end = 6053
-    _globals["_PIPELINEEVENT"]._serialized_start = 6055
-    _globals["_PIPELINEEVENT"]._serialized_end = 6171
-    _globals["_SOURCECODELOCATION"]._serialized_start = 6174
-    _globals["_SOURCECODELOCATION"]._serialized_end = 6415
-    _globals["_PIPELINEQUERYFUNCTIONEXECUTIONSIGNAL"]._serialized_start = 6418
-    _globals["_PIPELINEQUERYFUNCTIONEXECUTIONSIGNAL"]._serialized_end = 6569
-    _globals["_PIPELINEANALYSISCONTEXT"]._serialized_start = 6572
-    _globals["_PIPELINEANALYSISCONTEXT"]._serialized_end = 6940
+    ]._serialized_end = 2631
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS"]._serialized_start = 2655
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS"]._serialized_end = 2864
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS_OPTIONSENTRY"]._serialized_start = 2795
+    _globals["_PIPELINECOMMAND_DEFINEOUTPUT_SINKDETAILS_OPTIONSENTRY"]._serialized_end = 2853
+    _globals["_PIPELINECOMMAND_DEFINEFLOW"]._serialized_start = 2969
+    _globals["_PIPELINECOMMAND_DEFINEFLOW"]._serialized_end = 4855
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._serialized_start = 1474
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._serialized_end = 1532
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_WRITERELATIONFLOWDETAILS"]._serialized_start = 3703
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_WRITERELATIONFLOWDETAILS"]._serialized_end = 3800
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_AUTOCDCFLOWDETAILS"]._serialized_start = 3803
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_AUTOCDCFLOWDETAILS"]._serialized_end = 4623
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_RESPONSE"]._serialized_start = 4625
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_RESPONSE"]._serialized_end = 4683
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_SCDTYPE"]._serialized_start = 4685
+    _globals["_PIPELINECOMMAND_DEFINEFLOW_SCDTYPE"]._serialized_end = 4736
+    _globals["_PIPELINECOMMAND_EXECUTEOUTPUTFLOWS"]._serialized_start = 4858
+    _globals["_PIPELINECOMMAND_EXECUTEOUTPUTFLOWS"]._serialized_end = 5214
+    _globals["_PIPELINECOMMAND_STARTRUN"]._serialized_start = 5217
+    _globals["_PIPELINECOMMAND_STARTRUN"]._serialized_end = 5539
+    _globals["_PIPELINECOMMAND_DEFINESQLGRAPHELEMENTS"]._serialized_start = 5542
+    _globals["_PIPELINECOMMAND_DEFINESQLGRAPHELEMENTS"]._serialized_end = 5741
+    _globals["_PIPELINECOMMAND_GETQUERYFUNCTIONEXECUTIONSIGNALSTREAM"]._serialized_start = 5744
+    _globals["_PIPELINECOMMAND_GETQUERYFUNCTIONEXECUTIONSIGNALSTREAM"]._serialized_end = 5902
+    _globals["_PIPELINECOMMAND_DEFINEFLOWQUERYFUNCTIONRESULT"]._serialized_start = 5905
+    _globals["_PIPELINECOMMAND_DEFINEFLOWQUERYFUNCTIONRESULT"]._serialized_end = 6231
+    _globals["_PIPELINECOMMANDRESULT"]._serialized_start = 6250
+    _globals["_PIPELINECOMMANDRESULT"]._serialized_end = 7002
+    _globals["_PIPELINECOMMANDRESULT_CREATEDATAFLOWGRAPHRESULT"]._serialized_start = 6619
+    _globals["_PIPELINECOMMANDRESULT_CREATEDATAFLOWGRAPHRESULT"]._serialized_end = 6717
+    _globals["_PIPELINECOMMANDRESULT_DEFINEOUTPUTRESULT"]._serialized_start = 6720
+    _globals["_PIPELINECOMMANDRESULT_DEFINEOUTPUTRESULT"]._serialized_end = 6853
+    _globals["_PIPELINECOMMANDRESULT_DEFINEFLOWRESULT"]._serialized_start = 6856
+    _globals["_PIPELINECOMMANDRESULT_DEFINEFLOWRESULT"]._serialized_end = 6987
+    _globals["_PIPELINEEVENTRESULT"]._serialized_start = 7004
+    _globals["_PIPELINEEVENTRESULT"]._serialized_end = 7077
+    _globals["_PIPELINEEVENT"]._serialized_start = 7079
+    _globals["_PIPELINEEVENT"]._serialized_end = 7195
+    _globals["_SOURCECODELOCATION"]._serialized_start = 7198
+    _globals["_SOURCECODELOCATION"]._serialized_end = 7439
+    _globals["_PIPELINEQUERYFUNCTIONEXECUTIONSIGNAL"]._serialized_start = 7442
+    _globals["_PIPELINEQUERYFUNCTIONEXECUTIONSIGNAL"]._serialized_end = 7593
+    _globals["_PIPELINEANALYSISCONTEXT"]._serialized_start = 7596
+    _globals["_PIPELINEANALYSISCONTEXT"]._serialized_end = 7964
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/pipelines_pb2.pyi b/python/pyspark/sql/connect/proto/pipelines_pb2.pyi
index ee628ddd2419d..f327a7e92ac84 100644
--- a/python/pyspark/sql/connect/proto/pipelines_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/pipelines_pb2.pyi
@@ -43,6 +43,7 @@ import google.protobuf.internal.enum_type_wrapper
 import google.protobuf.message
 import google.protobuf.timestamp_pb2
 import pyspark.sql.connect.proto.common_pb2
+import pyspark.sql.connect.proto.expressions_pb2
 import pyspark.sql.connect.proto.relations_pb2
 import pyspark.sql.connect.proto.types_pb2
 import sys
@@ -505,6 +506,26 @@ class PipelineCommand(google.protobuf.message.Message):
 
         DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
+        class _SCDType:
+            ValueType = typing.NewType("ValueType", builtins.int)
+            V: typing_extensions.TypeAlias = ValueType
+
+        class _SCDTypeEnumTypeWrapper(
+            google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+                PipelineCommand.DefineFlow._SCDType.ValueType
+            ],
+            builtins.type,
+        ):  # noqa: F821
+            DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+            SCD_TYPE_UNSPECIFIED: PipelineCommand.DefineFlow._SCDType.ValueType  # 0
+            SCD_TYPE_1: PipelineCommand.DefineFlow._SCDType.ValueType  # 1
+
+        class SCDType(_SCDType, metaclass=_SCDTypeEnumTypeWrapper):
+            """SCD Type for Auto CDC target tables."""
+
+        SCD_TYPE_UNSPECIFIED: PipelineCommand.DefineFlow.SCDType.ValueType  # 0
+        SCD_TYPE_1: PipelineCommand.DefineFlow.SCDType.ValueType  # 1
+
         class SqlConfEntry(google.protobuf.message.Message):
             DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
@@ -554,6 +575,172 @@ class PipelineCommand(google.protobuf.message.Message):
                 self, oneof_group: typing_extensions.Literal["_relation", b"_relation"]
             ) -> typing_extensions.Literal["relation"] | None: ...
 
+        class AutoCdcFlowDetails(google.protobuf.message.Message):
+            """Details for Auto CDC flows."""
+
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            SOURCE_FIELD_NUMBER: builtins.int
+            KEYS_FIELD_NUMBER: builtins.int
+            SEQUENCE_BY_FIELD_NUMBER: builtins.int
+            APPLY_AS_DELETES_FIELD_NUMBER: builtins.int
+            APPLY_AS_TRUNCATES_FIELD_NUMBER: builtins.int
+            COLUMN_LIST_FIELD_NUMBER: builtins.int
+            EXCEPT_COLUMN_LIST_FIELD_NUMBER: builtins.int
+            STORED_AS_SCD_TYPE_FIELD_NUMBER: builtins.int
+            IGNORE_NULL_UPDATES_COLUMN_LIST_FIELD_NUMBER: builtins.int
+            IGNORE_NULL_UPDATES_EXCEPT_COLUMN_LIST_FIELD_NUMBER: builtins.int
+            source: builtins.str
+            """The name of the CDC source to stream from."""
+            @property
+            def keys(
+                self,
+            ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+                pyspark.sql.connect.proto.expressions_pb2.Expression
+            ]:
+                """Column(s) that uniquely identify a row in source and target data."""
+            @property
+            def sequence_by(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+                """Expression to order the source data."""
+            @property
+            def apply_as_deletes(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+                """Delete condition for the merged operation."""
+            @property
+            def apply_as_truncates(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+                """Truncate condition for the merged operation."""
+            @property
+            def column_list(
+                self,
+            ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+                pyspark.sql.connect.proto.expressions_pb2.Expression
+            ]:
+                """Columns included in the output table."""
+            @property
+            def except_column_list(
+                self,
+            ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+                pyspark.sql.connect.proto.expressions_pb2.Expression
+            ]:
+                """Columns excluded from the output table."""
+            stored_as_scd_type: global___PipelineCommand.DefineFlow.SCDType.ValueType
+            """SCD Type for target table."""
+            @property
+            def ignore_null_updates_column_list(
+                self,
+            ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+                pyspark.sql.connect.proto.expressions_pb2.Expression
+            ]:
+                """Subset of columns to ignore null in updates."""
+            @property
+            def ignore_null_updates_except_column_list(
+                self,
+            ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+                pyspark.sql.connect.proto.expressions_pb2.Expression
+            ]:
+                """Subset of columns excluded from ignoring null in updates."""
+            def __init__(
+                self,
+                *,
+                source: builtins.str | None = ...,
+                keys: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+                | None = ...,
+                sequence_by: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+                apply_as_deletes: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+                apply_as_truncates: pyspark.sql.connect.proto.expressions_pb2.Expression
+                | None = ...,
+                column_list: collections.abc.Iterable[
+                    pyspark.sql.connect.proto.expressions_pb2.Expression
+                ]
+                | None = ...,
+                except_column_list: collections.abc.Iterable[
+                    pyspark.sql.connect.proto.expressions_pb2.Expression
+                ]
+                | None = ...,
+                stored_as_scd_type: global___PipelineCommand.DefineFlow.SCDType.ValueType = ...,
+                ignore_null_updates_column_list: collections.abc.Iterable[
+                    pyspark.sql.connect.proto.expressions_pb2.Expression
+                ]
+                | None = ...,
+                ignore_null_updates_except_column_list: collections.abc.Iterable[
+                    pyspark.sql.connect.proto.expressions_pb2.Expression
+                ]
+                | None = ...,
+            ) -> None: ...
+            def HasField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "_apply_as_deletes",
+                    b"_apply_as_deletes",
+                    "_apply_as_truncates",
+                    b"_apply_as_truncates",
+                    "_sequence_by",
+                    b"_sequence_by",
+                    "_source",
+                    b"_source",
+                    "apply_as_deletes",
+                    b"apply_as_deletes",
+                    "apply_as_truncates",
+                    b"apply_as_truncates",
+                    "sequence_by",
+                    b"sequence_by",
+                    "source",
+                    b"source",
+                ],
+            ) -> builtins.bool: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "_apply_as_deletes",
+                    b"_apply_as_deletes",
+                    "_apply_as_truncates",
+                    b"_apply_as_truncates",
+                    "_sequence_by",
+                    b"_sequence_by",
+                    "_source",
+                    b"_source",
+                    "apply_as_deletes",
+                    b"apply_as_deletes",
+                    "apply_as_truncates",
+                    b"apply_as_truncates",
+                    "column_list",
+                    b"column_list",
+                    "except_column_list",
+                    b"except_column_list",
+                    "ignore_null_updates_column_list",
+                    b"ignore_null_updates_column_list",
+                    "ignore_null_updates_except_column_list",
+                    b"ignore_null_updates_except_column_list",
+                    "keys",
+                    b"keys",
+                    "sequence_by",
+                    b"sequence_by",
+                    "source",
+                    b"source",
+                    "stored_as_scd_type",
+                    b"stored_as_scd_type",
+                ],
+            ) -> None: ...
+            @typing.overload
+            def WhichOneof(
+                self,
+                oneof_group: typing_extensions.Literal["_apply_as_deletes", b"_apply_as_deletes"],
+            ) -> typing_extensions.Literal["apply_as_deletes"] | None: ...
+            @typing.overload
+            def WhichOneof(
+                self,
+                oneof_group: typing_extensions.Literal[
+                    "_apply_as_truncates", b"_apply_as_truncates"
+                ],
+            ) -> typing_extensions.Literal["apply_as_truncates"] | None: ...
+            @typing.overload
+            def WhichOneof(
+                self, oneof_group: typing_extensions.Literal["_sequence_by", b"_sequence_by"]
+            ) -> typing_extensions.Literal["sequence_by"] | None: ...
+            @typing.overload
+            def WhichOneof(
+                self, oneof_group: typing_extensions.Literal["_source", b"_source"]
+            ) -> typing_extensions.Literal["source"] | None: ...
+
         class Response(google.protobuf.message.Message):
             DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
@@ -588,6 +775,7 @@ class PipelineCommand(google.protobuf.message.Message):
         CLIENT_ID_FIELD_NUMBER: builtins.int
         SOURCE_CODE_LOCATION_FIELD_NUMBER: builtins.int
         RELATION_FLOW_DETAILS_FIELD_NUMBER: builtins.int
+        AUTO_CDC_FLOW_DETAILS_FIELD_NUMBER: builtins.int
         EXTENSION_FIELD_NUMBER: builtins.int
         ONCE_FIELD_NUMBER: builtins.int
         dataflow_graph_id: builtins.str
@@ -613,6 +801,10 @@ class PipelineCommand(google.protobuf.message.Message):
             self,
         ) -> global___PipelineCommand.DefineFlow.WriteRelationFlowDetails: ...
         @property
+        def auto_cdc_flow_details(
+            self,
+        ) -> global___PipelineCommand.DefineFlow.AutoCdcFlowDetails: ...
+        @property
         def extension(self) -> google.protobuf.any_pb2.Any: ...
         once: builtins.bool
         """If true, define the flow as a one-time flow, such as for backfill.
@@ -632,6 +824,8 @@ class PipelineCommand(google.protobuf.message.Message):
             source_code_location: global___SourceCodeLocation | None = ...,
             relation_flow_details: global___PipelineCommand.DefineFlow.WriteRelationFlowDetails
             | None = ...,
+            auto_cdc_flow_details: global___PipelineCommand.DefineFlow.AutoCdcFlowDetails
+            | None = ...,
             extension: google.protobuf.any_pb2.Any | None = ...,
             once: builtins.bool | None = ...,
         ) -> None: ...
@@ -650,6 +844,8 @@ class PipelineCommand(google.protobuf.message.Message):
                 b"_source_code_location",
                 "_target_dataset_name",
                 b"_target_dataset_name",
+                "auto_cdc_flow_details",
+                b"auto_cdc_flow_details",
                 "client_id",
                 b"client_id",
                 "dataflow_graph_id",
@@ -685,6 +881,8 @@ class PipelineCommand(google.protobuf.message.Message):
                 b"_source_code_location",
                 "_target_dataset_name",
                 b"_target_dataset_name",
+                "auto_cdc_flow_details",
+                b"auto_cdc_flow_details",
                 "client_id",
                 b"client_id",
                 "dataflow_graph_id",
@@ -739,7 +937,10 @@ class PipelineCommand(google.protobuf.message.Message):
         @typing.overload
         def WhichOneof(
             self, oneof_group: typing_extensions.Literal["details", b"details"]
-        ) -> typing_extensions.Literal["relation_flow_details", "extension"] | None: ...
+        ) -> (
+            typing_extensions.Literal["relation_flow_details", "auto_cdc_flow_details", "extension"]
+            | None
+        ): ...
 
     class ExecuteOutputFlows(google.protobuf.message.Message):
         """Request to execute all flows for a single output (dataset or sink) remotely."""
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index d024c6a07ada8..f63b61fc344ef 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -44,7 +44,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/ml_common.proto"\xd9\x1e\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x38\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00R\ttranspose\x12w\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00R\x1dunresolvedTableValuedFunction\x12?\n\x0clateral_join\x18, \x01(\x0b\x32\x1a.spark.connect.LateralJoinH\x00R\x0blateralJoin\x12n\n\x1d\x63hunked_cached_local_relation\x18- \x01(\x0b\x32).spark.connect.ChunkedCachedLocalRelationH\x00R\x1a\x63hunkedCachedLocalRelation\x12K\n\x10relation_changes\x18. \x01(\x0b\x32\x1e.spark.connect.RelationChangesH\x00R\x0frelationChanges\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12=\n\x0bml_relation\x18\xac\x02 \x01(\x0b\x32\x19.spark.connect.MlRelationH\x00R\nmlRelation\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\xe4\x03\n\nMlRelation\x12\x43\n\ttransform\x18\x01 \x01(\x0b\x32#.spark.connect.MlRelation.TransformH\x00R\ttransform\x12,\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00R\x05\x66\x65tch\x12P\n\x15model_summary_dataset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x01R\x13modelSummaryDataset\x88\x01\x01\x1a\xeb\x01\n\tTransform\x12\x33\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00R\x06objRef\x12=\n\x0btransformer\x18\x02 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00R\x0btransformer\x12-\n\x05input\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06paramsB\n\n\x08operatorB\t\n\x07ml_typeB\x18\n\x16_model_summary_dataset"\xcb\x02\n\x05\x46\x65tch\x12\x31\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefR\x06objRef\x12\x35\n\x07methods\x18\x02 \x03(\x0b\x32\x1b.spark.connect.Fetch.MethodR\x07methods\x1a\xd7\x01\n\x06Method\x12\x16\n\x06method\x18\x01 \x01(\tR\x06method\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32 .spark.connect.Fetch.Method.ArgsR\x04\x61rgs\x1a\x7f\n\x04\x41rgs\x12\x39\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x05param\x12/\n\x05input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x05inputB\x0b\n\targs_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\xcd\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\xcb\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x12$\n\x0bsource_name\x18\x06 \x01(\tH\x02R\nsourceName\x88\x01\x01\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0e\n\x0c_source_nameB\x0b\n\tread_type"\xe8\x01\n\x0fRelationChanges\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.RelationChanges.OptionsEntryR\x07options\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"p\n\x1a\x43hunkedCachedLocalRelation\x12\x1e\n\ndataHashes\x18\x01 \x03(\tR\ndataHashes\x12#\n\nschemaHash\x18\x02 \x01(\tH\x00R\nschemaHash\x88\x01\x01\x42\r\n\x0b_schemaHash"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"z\n\tTranspose\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cindexColumns"}\n\x1dUnresolvedTableValuedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xd2\x06\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x12?\n\x0cstate_schema\x18\n \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x0bstateSchema\x88\x01\x01\x12\x65\n\x19transform_with_state_info\x18\x0b \x01(\x0b\x32%.spark.connect.TransformWithStateInfoH\x04R\x16transformWithStateInfo\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_confB\x0f\n\r_state_schemaB\x1c\n\x1a_transform_with_state_info"\xdf\x01\n\x16TransformWithStateInfo\x12\x1b\n\ttime_mode\x18\x01 \x01(\tR\x08timeMode\x12\x38\n\x16\x65vent_time_column_name\x18\x02 \x01(\tH\x00R\x13\x65ventTimeColumnName\x88\x01\x01\x12\x41\n\routput_schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x01R\x0coutputSchema\x88\x01\x01\x42\x19\n\x17_event_time_column_nameB\x10\n\x0e_output_schema"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x9a\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"n\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x12\x14\n\x10PARSE_FORMAT_XML\x10\x03\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirection"\xe6\x01\n\x0bLateralJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinTypeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/ml_common.proto"\xa1\x1f\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x38\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00R\ttranspose\x12w\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00R\x1dunresolvedTableValuedFunction\x12?\n\x0clateral_join\x18, \x01(\x0b\x32\x1a.spark.connect.LateralJoinH\x00R\x0blateralJoin\x12n\n\x1d\x63hunked_cached_local_relation\x18- \x01(\x0b\x32).spark.connect.ChunkedCachedLocalRelationH\x00R\x1a\x63hunkedCachedLocalRelation\x12K\n\x10relation_changes\x18. \x01(\x0b\x32\x1e.spark.connect.RelationChangesH\x00R\x0frelationChanges\x12\x46\n\x0fnearest_by_join\x18/ \x01(\x0b\x32\x1c.spark.connect.NearestByJoinH\x00R\rnearestByJoin\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12=\n\x0bml_relation\x18\xac\x02 \x01(\x0b\x32\x19.spark.connect.MlRelationH\x00R\nmlRelation\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\xe4\x03\n\nMlRelation\x12\x43\n\ttransform\x18\x01 \x01(\x0b\x32#.spark.connect.MlRelation.TransformH\x00R\ttransform\x12,\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00R\x05\x66\x65tch\x12P\n\x15model_summary_dataset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x01R\x13modelSummaryDataset\x88\x01\x01\x1a\xeb\x01\n\tTransform\x12\x33\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00R\x06objRef\x12=\n\x0btransformer\x18\x02 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00R\x0btransformer\x12-\n\x05input\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06paramsB\n\n\x08operatorB\t\n\x07ml_typeB\x18\n\x16_model_summary_dataset"\xcb\x02\n\x05\x46\x65tch\x12\x31\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefR\x06objRef\x12\x35\n\x07methods\x18\x02 \x03(\x0b\x32\x1b.spark.connect.Fetch.MethodR\x07methods\x1a\xd7\x01\n\x06Method\x12\x16\n\x06method\x18\x01 \x01(\tR\x06method\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32 .spark.connect.Fetch.Method.ArgsR\x04\x61rgs\x1a\x7f\n\x04\x41rgs\x12\x39\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x05param\x12/\n\x05input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x05inputB\x0b\n\targs_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\xcd\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\xcb\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x12$\n\x0bsource_name\x18\x06 \x01(\tH\x02R\nsourceName\x88\x01\x01\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0e\n\x0c_source_nameB\x0b\n\tread_type"\xe8\x01\n\x0fRelationChanges\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.RelationChanges.OptionsEntryR\x07options\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"p\n\x1a\x43hunkedCachedLocalRelation\x12\x1e\n\ndataHashes\x18\x01 \x03(\tR\ndataHashes\x12#\n\nschemaHash\x18\x02 \x01(\tH\x00R\nschemaHash\x88\x01\x01\x42\r\n\x0b_schemaHash"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"z\n\tTranspose\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cindexColumns"}\n\x1dUnresolvedTableValuedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xd2\x06\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x12?\n\x0cstate_schema\x18\n \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x0bstateSchema\x88\x01\x01\x12\x65\n\x19transform_with_state_info\x18\x0b \x01(\x0b\x32%.spark.connect.TransformWithStateInfoH\x04R\x16transformWithStateInfo\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_confB\x0f\n\r_state_schemaB\x1c\n\x1a_transform_with_state_info"\xdf\x01\n\x16TransformWithStateInfo\x12\x1b\n\ttime_mode\x18\x01 \x01(\tR\x08timeMode\x12\x38\n\x16\x65vent_time_column_name\x18\x02 \x01(\tH\x00R\x13\x65ventTimeColumnName\x88\x01\x01\x12\x41\n\routput_schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x01R\x0coutputSchema\x88\x01\x01\x42\x19\n\x17_event_time_column_nameB\x10\n\x0e_output_schema"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x9a\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"n\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x12\x14\n\x10PARSE_FORMAT_XML\x10\x03\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirection"\xe6\x01\n\x0bLateralJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType"\xa5\x02\n\rNearestByJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12H\n\x12ranking_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x11rankingExpression\x12\x1f\n\x0bnum_results\x18\x04 \x01(\x05R\nnumResults\x12\x1b\n\tjoin_type\x18\x05 \x01(\tR\x08joinType\x12\x12\n\x04mode\x18\x06 \x01(\tR\x04mode\x12\x1c\n\tdirection\x18\x07 \x01(\tR\tdirectionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -82,177 +82,179 @@
     _globals["_PARSE_OPTIONSENTRY"]._loaded_options = None
     _globals["_PARSE_OPTIONSENTRY"]._serialized_options = b"8\001"
     _globals["_RELATION"]._serialized_start = 224
-    _globals["_RELATION"]._serialized_end = 4153
-    _globals["_MLRELATION"]._serialized_start = 4156
-    _globals["_MLRELATION"]._serialized_end = 4640
-    _globals["_MLRELATION_TRANSFORM"]._serialized_start = 4368
-    _globals["_MLRELATION_TRANSFORM"]._serialized_end = 4603
-    _globals["_FETCH"]._serialized_start = 4643
-    _globals["_FETCH"]._serialized_end = 4974
-    _globals["_FETCH_METHOD"]._serialized_start = 4759
-    _globals["_FETCH_METHOD"]._serialized_end = 4974
-    _globals["_FETCH_METHOD_ARGS"]._serialized_start = 4847
-    _globals["_FETCH_METHOD_ARGS"]._serialized_end = 4974
-    _globals["_UNKNOWN"]._serialized_start = 4976
-    _globals["_UNKNOWN"]._serialized_end = 4985
-    _globals["_RELATIONCOMMON"]._serialized_start = 4988
-    _globals["_RELATIONCOMMON"]._serialized_end = 5130
-    _globals["_SQL"]._serialized_start = 5133
-    _globals["_SQL"]._serialized_end = 5611
-    _globals["_SQL_ARGSENTRY"]._serialized_start = 5427
-    _globals["_SQL_ARGSENTRY"]._serialized_end = 5517
-    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_start = 5519
-    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_end = 5611
-    _globals["_WITHRELATIONS"]._serialized_start = 5613
-    _globals["_WITHRELATIONS"]._serialized_end = 5730
-    _globals["_READ"]._serialized_start = 5733
-    _globals["_READ"]._serialized_end = 6450
-    _globals["_READ_NAMEDTABLE"]._serialized_start = 5911
-    _globals["_READ_NAMEDTABLE"]._serialized_end = 6103
-    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_start = 6045
-    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_end = 6103
-    _globals["_READ_DATASOURCE"]._serialized_start = 6106
-    _globals["_READ_DATASOURCE"]._serialized_end = 6437
-    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_start = 6045
-    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_end = 6103
-    _globals["_RELATIONCHANGES"]._serialized_start = 6453
-    _globals["_RELATIONCHANGES"]._serialized_end = 6685
-    _globals["_RELATIONCHANGES_OPTIONSENTRY"]._serialized_start = 6045
-    _globals["_RELATIONCHANGES_OPTIONSENTRY"]._serialized_end = 6103
-    _globals["_PROJECT"]._serialized_start = 6687
-    _globals["_PROJECT"]._serialized_end = 6804
-    _globals["_FILTER"]._serialized_start = 6806
-    _globals["_FILTER"]._serialized_end = 6918
-    _globals["_JOIN"]._serialized_start = 6921
-    _globals["_JOIN"]._serialized_end = 7582
-    _globals["_JOIN_JOINDATATYPE"]._serialized_start = 7260
-    _globals["_JOIN_JOINDATATYPE"]._serialized_end = 7352
-    _globals["_JOIN_JOINTYPE"]._serialized_start = 7355
-    _globals["_JOIN_JOINTYPE"]._serialized_end = 7563
-    _globals["_SETOPERATION"]._serialized_start = 7585
-    _globals["_SETOPERATION"]._serialized_end = 8064
-    _globals["_SETOPERATION_SETOPTYPE"]._serialized_start = 7901
-    _globals["_SETOPERATION_SETOPTYPE"]._serialized_end = 8015
-    _globals["_LIMIT"]._serialized_start = 8066
-    _globals["_LIMIT"]._serialized_end = 8142
-    _globals["_OFFSET"]._serialized_start = 8144
-    _globals["_OFFSET"]._serialized_end = 8223
-    _globals["_TAIL"]._serialized_start = 8225
-    _globals["_TAIL"]._serialized_end = 8300
-    _globals["_AGGREGATE"]._serialized_start = 8303
-    _globals["_AGGREGATE"]._serialized_end = 9069
-    _globals["_AGGREGATE_PIVOT"]._serialized_start = 8718
-    _globals["_AGGREGATE_PIVOT"]._serialized_end = 8829
-    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_start = 8831
-    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_end = 8907
-    _globals["_AGGREGATE_GROUPTYPE"]._serialized_start = 8910
-    _globals["_AGGREGATE_GROUPTYPE"]._serialized_end = 9069
-    _globals["_SORT"]._serialized_start = 9072
-    _globals["_SORT"]._serialized_end = 9232
-    _globals["_DROP"]._serialized_start = 9235
-    _globals["_DROP"]._serialized_end = 9376
-    _globals["_DEDUPLICATE"]._serialized_start = 9379
-    _globals["_DEDUPLICATE"]._serialized_end = 9619
-    _globals["_LOCALRELATION"]._serialized_start = 9621
-    _globals["_LOCALRELATION"]._serialized_end = 9710
-    _globals["_CACHEDLOCALRELATION"]._serialized_start = 9712
-    _globals["_CACHEDLOCALRELATION"]._serialized_end = 9784
-    _globals["_CHUNKEDCACHEDLOCALRELATION"]._serialized_start = 9786
-    _globals["_CHUNKEDCACHEDLOCALRELATION"]._serialized_end = 9898
-    _globals["_CACHEDREMOTERELATION"]._serialized_start = 9900
-    _globals["_CACHEDREMOTERELATION"]._serialized_end = 9955
-    _globals["_SAMPLE"]._serialized_start = 9958
-    _globals["_SAMPLE"]._serialized_end = 10231
-    _globals["_RANGE"]._serialized_start = 10234
-    _globals["_RANGE"]._serialized_end = 10379
-    _globals["_SUBQUERYALIAS"]._serialized_start = 10381
-    _globals["_SUBQUERYALIAS"]._serialized_end = 10495
-    _globals["_REPARTITION"]._serialized_start = 10498
-    _globals["_REPARTITION"]._serialized_end = 10640
-    _globals["_SHOWSTRING"]._serialized_start = 10643
-    _globals["_SHOWSTRING"]._serialized_end = 10785
-    _globals["_HTMLSTRING"]._serialized_start = 10787
-    _globals["_HTMLSTRING"]._serialized_end = 10901
-    _globals["_STATSUMMARY"]._serialized_start = 10903
-    _globals["_STATSUMMARY"]._serialized_end = 10995
-    _globals["_STATDESCRIBE"]._serialized_start = 10997
-    _globals["_STATDESCRIBE"]._serialized_end = 11078
-    _globals["_STATCROSSTAB"]._serialized_start = 11080
-    _globals["_STATCROSSTAB"]._serialized_end = 11181
-    _globals["_STATCOV"]._serialized_start = 11183
-    _globals["_STATCOV"]._serialized_end = 11279
-    _globals["_STATCORR"]._serialized_start = 11282
-    _globals["_STATCORR"]._serialized_end = 11419
-    _globals["_STATAPPROXQUANTILE"]._serialized_start = 11422
-    _globals["_STATAPPROXQUANTILE"]._serialized_end = 11586
-    _globals["_STATFREQITEMS"]._serialized_start = 11588
-    _globals["_STATFREQITEMS"]._serialized_end = 11713
-    _globals["_STATSAMPLEBY"]._serialized_start = 11716
-    _globals["_STATSAMPLEBY"]._serialized_end = 12025
-    _globals["_STATSAMPLEBY_FRACTION"]._serialized_start = 11917
-    _globals["_STATSAMPLEBY_FRACTION"]._serialized_end = 12016
-    _globals["_NAFILL"]._serialized_start = 12028
-    _globals["_NAFILL"]._serialized_end = 12162
-    _globals["_NADROP"]._serialized_start = 12165
-    _globals["_NADROP"]._serialized_end = 12299
-    _globals["_NAREPLACE"]._serialized_start = 12302
-    _globals["_NAREPLACE"]._serialized_end = 12598
-    _globals["_NAREPLACE_REPLACEMENT"]._serialized_start = 12457
-    _globals["_NAREPLACE_REPLACEMENT"]._serialized_end = 12598
-    _globals["_TODF"]._serialized_start = 12600
-    _globals["_TODF"]._serialized_end = 12688
-    _globals["_WITHCOLUMNSRENAMED"]._serialized_start = 12691
-    _globals["_WITHCOLUMNSRENAMED"]._serialized_end = 13073
-    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_start = 12935
-    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_end = 13002
-    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_start = 13004
-    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_end = 13073
-    _globals["_WITHCOLUMNS"]._serialized_start = 13075
-    _globals["_WITHCOLUMNS"]._serialized_end = 13194
-    _globals["_WITHWATERMARK"]._serialized_start = 13197
-    _globals["_WITHWATERMARK"]._serialized_end = 13331
-    _globals["_HINT"]._serialized_start = 13334
-    _globals["_HINT"]._serialized_end = 13466
-    _globals["_UNPIVOT"]._serialized_start = 13469
-    _globals["_UNPIVOT"]._serialized_end = 13796
-    _globals["_UNPIVOT_VALUES"]._serialized_start = 13726
-    _globals["_UNPIVOT_VALUES"]._serialized_end = 13785
-    _globals["_TRANSPOSE"]._serialized_start = 13798
-    _globals["_TRANSPOSE"]._serialized_end = 13920
-    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_start = 13922
-    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_end = 14047
-    _globals["_TOSCHEMA"]._serialized_start = 14049
-    _globals["_TOSCHEMA"]._serialized_end = 14155
-    _globals["_REPARTITIONBYEXPRESSION"]._serialized_start = 14158
-    _globals["_REPARTITIONBYEXPRESSION"]._serialized_end = 14361
-    _globals["_MAPPARTITIONS"]._serialized_start = 14364
-    _globals["_MAPPARTITIONS"]._serialized_end = 14596
-    _globals["_GROUPMAP"]._serialized_start = 14599
-    _globals["_GROUPMAP"]._serialized_end = 15449
-    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_start = 15452
-    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_end = 15675
-    _globals["_COGROUPMAP"]._serialized_start = 15678
-    _globals["_COGROUPMAP"]._serialized_end = 16204
-    _globals["_APPLYINPANDASWITHSTATE"]._serialized_start = 16207
-    _globals["_APPLYINPANDASWITHSTATE"]._serialized_end = 16564
-    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_start = 16567
-    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_end = 16811
-    _globals["_PYTHONUDTF"]._serialized_start = 16814
-    _globals["_PYTHONUDTF"]._serialized_end = 16991
-    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_start = 16994
-    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_end = 17145
-    _globals["_PYTHONDATASOURCE"]._serialized_start = 17147
-    _globals["_PYTHONDATASOURCE"]._serialized_end = 17222
-    _globals["_COLLECTMETRICS"]._serialized_start = 17225
-    _globals["_COLLECTMETRICS"]._serialized_end = 17361
-    _globals["_PARSE"]._serialized_start = 17364
-    _globals["_PARSE"]._serialized_end = 17774
-    _globals["_PARSE_OPTIONSENTRY"]._serialized_start = 6045
-    _globals["_PARSE_OPTIONSENTRY"]._serialized_end = 6103
-    _globals["_PARSE_PARSEFORMAT"]._serialized_start = 17653
-    _globals["_PARSE_PARSEFORMAT"]._serialized_end = 17763
-    _globals["_ASOFJOIN"]._serialized_start = 17777
-    _globals["_ASOFJOIN"]._serialized_end = 18252
-    _globals["_LATERALJOIN"]._serialized_start = 18255
-    _globals["_LATERALJOIN"]._serialized_end = 18485
+    _globals["_RELATION"]._serialized_end = 4225
+    _globals["_MLRELATION"]._serialized_start = 4228
+    _globals["_MLRELATION"]._serialized_end = 4712
+    _globals["_MLRELATION_TRANSFORM"]._serialized_start = 4440
+    _globals["_MLRELATION_TRANSFORM"]._serialized_end = 4675
+    _globals["_FETCH"]._serialized_start = 4715
+    _globals["_FETCH"]._serialized_end = 5046
+    _globals["_FETCH_METHOD"]._serialized_start = 4831
+    _globals["_FETCH_METHOD"]._serialized_end = 5046
+    _globals["_FETCH_METHOD_ARGS"]._serialized_start = 4919
+    _globals["_FETCH_METHOD_ARGS"]._serialized_end = 5046
+    _globals["_UNKNOWN"]._serialized_start = 5048
+    _globals["_UNKNOWN"]._serialized_end = 5057
+    _globals["_RELATIONCOMMON"]._serialized_start = 5060
+    _globals["_RELATIONCOMMON"]._serialized_end = 5202
+    _globals["_SQL"]._serialized_start = 5205
+    _globals["_SQL"]._serialized_end = 5683
+    _globals["_SQL_ARGSENTRY"]._serialized_start = 5499
+    _globals["_SQL_ARGSENTRY"]._serialized_end = 5589
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_start = 5591
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_end = 5683
+    _globals["_WITHRELATIONS"]._serialized_start = 5685
+    _globals["_WITHRELATIONS"]._serialized_end = 5802
+    _globals["_READ"]._serialized_start = 5805
+    _globals["_READ"]._serialized_end = 6522
+    _globals["_READ_NAMEDTABLE"]._serialized_start = 5983
+    _globals["_READ_NAMEDTABLE"]._serialized_end = 6175
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_start = 6117
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_end = 6175
+    _globals["_READ_DATASOURCE"]._serialized_start = 6178
+    _globals["_READ_DATASOURCE"]._serialized_end = 6509
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_start = 6117
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_end = 6175
+    _globals["_RELATIONCHANGES"]._serialized_start = 6525
+    _globals["_RELATIONCHANGES"]._serialized_end = 6757
+    _globals["_RELATIONCHANGES_OPTIONSENTRY"]._serialized_start = 6117
+    _globals["_RELATIONCHANGES_OPTIONSENTRY"]._serialized_end = 6175
+    _globals["_PROJECT"]._serialized_start = 6759
+    _globals["_PROJECT"]._serialized_end = 6876
+    _globals["_FILTER"]._serialized_start = 6878
+    _globals["_FILTER"]._serialized_end = 6990
+    _globals["_JOIN"]._serialized_start = 6993
+    _globals["_JOIN"]._serialized_end = 7654
+    _globals["_JOIN_JOINDATATYPE"]._serialized_start = 7332
+    _globals["_JOIN_JOINDATATYPE"]._serialized_end = 7424
+    _globals["_JOIN_JOINTYPE"]._serialized_start = 7427
+    _globals["_JOIN_JOINTYPE"]._serialized_end = 7635
+    _globals["_SETOPERATION"]._serialized_start = 7657
+    _globals["_SETOPERATION"]._serialized_end = 8136
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_start = 7973
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_end = 8087
+    _globals["_LIMIT"]._serialized_start = 8138
+    _globals["_LIMIT"]._serialized_end = 8214
+    _globals["_OFFSET"]._serialized_start = 8216
+    _globals["_OFFSET"]._serialized_end = 8295
+    _globals["_TAIL"]._serialized_start = 8297
+    _globals["_TAIL"]._serialized_end = 8372
+    _globals["_AGGREGATE"]._serialized_start = 8375
+    _globals["_AGGREGATE"]._serialized_end = 9141
+    _globals["_AGGREGATE_PIVOT"]._serialized_start = 8790
+    _globals["_AGGREGATE_PIVOT"]._serialized_end = 8901
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_start = 8903
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_end = 8979
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_start = 8982
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_end = 9141
+    _globals["_SORT"]._serialized_start = 9144
+    _globals["_SORT"]._serialized_end = 9304
+    _globals["_DROP"]._serialized_start = 9307
+    _globals["_DROP"]._serialized_end = 9448
+    _globals["_DEDUPLICATE"]._serialized_start = 9451
+    _globals["_DEDUPLICATE"]._serialized_end = 9691
+    _globals["_LOCALRELATION"]._serialized_start = 9693
+    _globals["_LOCALRELATION"]._serialized_end = 9782
+    _globals["_CACHEDLOCALRELATION"]._serialized_start = 9784
+    _globals["_CACHEDLOCALRELATION"]._serialized_end = 9856
+    _globals["_CHUNKEDCACHEDLOCALRELATION"]._serialized_start = 9858
+    _globals["_CHUNKEDCACHEDLOCALRELATION"]._serialized_end = 9970
+    _globals["_CACHEDREMOTERELATION"]._serialized_start = 9972
+    _globals["_CACHEDREMOTERELATION"]._serialized_end = 10027
+    _globals["_SAMPLE"]._serialized_start = 10030
+    _globals["_SAMPLE"]._serialized_end = 10303
+    _globals["_RANGE"]._serialized_start = 10306
+    _globals["_RANGE"]._serialized_end = 10451
+    _globals["_SUBQUERYALIAS"]._serialized_start = 10453
+    _globals["_SUBQUERYALIAS"]._serialized_end = 10567
+    _globals["_REPARTITION"]._serialized_start = 10570
+    _globals["_REPARTITION"]._serialized_end = 10712
+    _globals["_SHOWSTRING"]._serialized_start = 10715
+    _globals["_SHOWSTRING"]._serialized_end = 10857
+    _globals["_HTMLSTRING"]._serialized_start = 10859
+    _globals["_HTMLSTRING"]._serialized_end = 10973
+    _globals["_STATSUMMARY"]._serialized_start = 10975
+    _globals["_STATSUMMARY"]._serialized_end = 11067
+    _globals["_STATDESCRIBE"]._serialized_start = 11069
+    _globals["_STATDESCRIBE"]._serialized_end = 11150
+    _globals["_STATCROSSTAB"]._serialized_start = 11152
+    _globals["_STATCROSSTAB"]._serialized_end = 11253
+    _globals["_STATCOV"]._serialized_start = 11255
+    _globals["_STATCOV"]._serialized_end = 11351
+    _globals["_STATCORR"]._serialized_start = 11354
+    _globals["_STATCORR"]._serialized_end = 11491
+    _globals["_STATAPPROXQUANTILE"]._serialized_start = 11494
+    _globals["_STATAPPROXQUANTILE"]._serialized_end = 11658
+    _globals["_STATFREQITEMS"]._serialized_start = 11660
+    _globals["_STATFREQITEMS"]._serialized_end = 11785
+    _globals["_STATSAMPLEBY"]._serialized_start = 11788
+    _globals["_STATSAMPLEBY"]._serialized_end = 12097
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_start = 11989
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_end = 12088
+    _globals["_NAFILL"]._serialized_start = 12100
+    _globals["_NAFILL"]._serialized_end = 12234
+    _globals["_NADROP"]._serialized_start = 12237
+    _globals["_NADROP"]._serialized_end = 12371
+    _globals["_NAREPLACE"]._serialized_start = 12374
+    _globals["_NAREPLACE"]._serialized_end = 12670
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_start = 12529
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_end = 12670
+    _globals["_TODF"]._serialized_start = 12672
+    _globals["_TODF"]._serialized_end = 12760
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_start = 12763
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_end = 13145
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_start = 13007
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_end = 13074
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_start = 13076
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_end = 13145
+    _globals["_WITHCOLUMNS"]._serialized_start = 13147
+    _globals["_WITHCOLUMNS"]._serialized_end = 13266
+    _globals["_WITHWATERMARK"]._serialized_start = 13269
+    _globals["_WITHWATERMARK"]._serialized_end = 13403
+    _globals["_HINT"]._serialized_start = 13406
+    _globals["_HINT"]._serialized_end = 13538
+    _globals["_UNPIVOT"]._serialized_start = 13541
+    _globals["_UNPIVOT"]._serialized_end = 13868
+    _globals["_UNPIVOT_VALUES"]._serialized_start = 13798
+    _globals["_UNPIVOT_VALUES"]._serialized_end = 13857
+    _globals["_TRANSPOSE"]._serialized_start = 13870
+    _globals["_TRANSPOSE"]._serialized_end = 13992
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_start = 13994
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_end = 14119
+    _globals["_TOSCHEMA"]._serialized_start = 14121
+    _globals["_TOSCHEMA"]._serialized_end = 14227
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_start = 14230
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_end = 14433
+    _globals["_MAPPARTITIONS"]._serialized_start = 14436
+    _globals["_MAPPARTITIONS"]._serialized_end = 14668
+    _globals["_GROUPMAP"]._serialized_start = 14671
+    _globals["_GROUPMAP"]._serialized_end = 15521
+    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_start = 15524
+    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_end = 15747
+    _globals["_COGROUPMAP"]._serialized_start = 15750
+    _globals["_COGROUPMAP"]._serialized_end = 16276
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_start = 16279
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_end = 16636
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_start = 16639
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_end = 16883
+    _globals["_PYTHONUDTF"]._serialized_start = 16886
+    _globals["_PYTHONUDTF"]._serialized_end = 17063
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_start = 17066
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_end = 17217
+    _globals["_PYTHONDATASOURCE"]._serialized_start = 17219
+    _globals["_PYTHONDATASOURCE"]._serialized_end = 17294
+    _globals["_COLLECTMETRICS"]._serialized_start = 17297
+    _globals["_COLLECTMETRICS"]._serialized_end = 17433
+    _globals["_PARSE"]._serialized_start = 17436
+    _globals["_PARSE"]._serialized_end = 17846
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_start = 6117
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_end = 6175
+    _globals["_PARSE_PARSEFORMAT"]._serialized_start = 17725
+    _globals["_PARSE_PARSEFORMAT"]._serialized_end = 17835
+    _globals["_ASOFJOIN"]._serialized_start = 17849
+    _globals["_ASOFJOIN"]._serialized_end = 18324
+    _globals["_LATERALJOIN"]._serialized_start = 18327
+    _globals["_LATERALJOIN"]._serialized_end = 18557
+    _globals["_NEARESTBYJOIN"]._serialized_start = 18560
+    _globals["_NEARESTBYJOIN"]._serialized_end = 18853
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 7b3968545ce0d..c99de778db4cd 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -111,6 +111,7 @@ class Relation(google.protobuf.message.Message):
     LATERAL_JOIN_FIELD_NUMBER: builtins.int
     CHUNKED_CACHED_LOCAL_RELATION_FIELD_NUMBER: builtins.int
     RELATION_CHANGES_FIELD_NUMBER: builtins.int
+    NEAREST_BY_JOIN_FIELD_NUMBER: builtins.int
     FILL_NA_FIELD_NUMBER: builtins.int
     DROP_NA_FIELD_NUMBER: builtins.int
     REPLACE_FIELD_NUMBER: builtins.int
@@ -223,6 +224,8 @@ class Relation(google.protobuf.message.Message):
     @property
     def relation_changes(self) -> global___RelationChanges: ...
     @property
+    def nearest_by_join(self) -> global___NearestByJoin: ...
+    @property
     def fill_na(self) -> global___NAFill:
         """NA functions"""
     @property
@@ -310,6 +313,7 @@ class Relation(google.protobuf.message.Message):
         lateral_join: global___LateralJoin | None = ...,
         chunked_cached_local_relation: global___ChunkedCachedLocalRelation | None = ...,
         relation_changes: global___RelationChanges | None = ...,
+        nearest_by_join: global___NearestByJoin | None = ...,
         fill_na: global___NAFill | None = ...,
         drop_na: global___NADrop | None = ...,
         replace: global___NAReplace | None = ...,
@@ -395,6 +399,8 @@ class Relation(google.protobuf.message.Message):
             b"map_partitions",
             "ml_relation",
             b"ml_relation",
+            "nearest_by_join",
+            b"nearest_by_join",
             "offset",
             b"offset",
             "parse",
@@ -524,6 +530,8 @@ class Relation(google.protobuf.message.Message):
             b"map_partitions",
             "ml_relation",
             b"ml_relation",
+            "nearest_by_join",
+            b"nearest_by_join",
             "offset",
             b"offset",
             "parse",
@@ -633,6 +641,7 @@ class Relation(google.protobuf.message.Message):
             "lateral_join",
             "chunked_cached_local_relation",
             "relation_changes",
+            "nearest_by_join",
             "fill_na",
             "drop_na",
             "replace",
@@ -4657,3 +4666,79 @@ class LateralJoin(google.protobuf.message.Message):
     ) -> None: ...
 
 global___LateralJoin = LateralJoin
+
+class NearestByJoin(google.protobuf.message.Message):
+    """Relation of type [[NearestByJoin]].
+
+    For each row on the left side, returns up to `num_results` rows from the right side ranked
+    by `ranking_expression`.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    LEFT_FIELD_NUMBER: builtins.int
+    RIGHT_FIELD_NUMBER: builtins.int
+    RANKING_EXPRESSION_FIELD_NUMBER: builtins.int
+    NUM_RESULTS_FIELD_NUMBER: builtins.int
+    JOIN_TYPE_FIELD_NUMBER: builtins.int
+    MODE_FIELD_NUMBER: builtins.int
+    DIRECTION_FIELD_NUMBER: builtins.int
+    @property
+    def left(self) -> global___Relation:
+        """(Required) Left (query) input relation."""
+    @property
+    def right(self) -> global___Relation:
+        """(Required) Right (base) input relation."""
+    @property
+    def ranking_expression(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Required) Scalar expression used to rank candidate rows on the right side."""
+    num_results: builtins.int
+    """(Required) Maximum number of matches per left row. Must be between 1 and 100000."""
+    join_type: builtins.str
+    """The following three fields use `string` (not typed enums) for parity with `AsOfJoin`,
+    which models analogous fields the same way. Validation happens server-side at planning time.
+
+    (Required) The join type. Must be one of: "inner", "leftouter".
+    """
+    mode: builtins.str
+    """(Required) Search algorithm contract. Must be one of: "approx", "exact"."""
+    direction: builtins.str
+    """(Required) Ranking direction. Must be one of: "distance", "similarity"."""
+    def __init__(
+        self,
+        *,
+        left: global___Relation | None = ...,
+        right: global___Relation | None = ...,
+        ranking_expression: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+        num_results: builtins.int = ...,
+        join_type: builtins.str = ...,
+        mode: builtins.str = ...,
+        direction: builtins.str = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "left", b"left", "ranking_expression", b"ranking_expression", "right", b"right"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "direction",
+            b"direction",
+            "join_type",
+            b"join_type",
+            "left",
+            b"left",
+            "mode",
+            b"mode",
+            "num_results",
+            b"num_results",
+            "ranking_expression",
+            b"ranking_expression",
+            "right",
+            b"right",
+        ],
+    ) -> None: ...
+
+global___NearestByJoin = NearestByJoin
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b2586a2d7a18c..734b8cad62110 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2865,6 +2865,73 @@ def lateralJoin(
         """
         ...
 
+    def nearestByJoin(
+        self,
+        other: "DataFrame",
+        rankingExpression: Column,
+        numResults: int,
+        mode: str,
+        direction: str,
+        *,
+        joinType: str = "inner",
+    ) -> "DataFrame":
+        """
+        Nearest-by top-K ranking join with another :class:`DataFrame`. For each row on the
+        left (query side), returns up to ``numResults`` rows from ``other`` (base side), ranked
+        by ``rankingExpression``.
+
+        The current implementation evaluates the full cross-product of left and right and
+        bounds memory per left row by ``numResults``. Index-backed approximate strategies
+        (transparent to ``approx`` mode) are planned for a future release; until then,
+        pre-filter ``other`` when it is large. Tie-breaking among rows with equal ranking
+        values is unspecified.
+
+        .. versionadded:: 4.2.0
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Right (base side) of the join - the candidate pool searched for each row of this
+            DataFrame.
+        rankingExpression : :class:`Column`
+            Scalar expression used to rank candidate rows on the right side.
+        numResults : int
+            Maximum number of matches per query row. Must be between 1 and 100000.
+        mode : str
+            Search algorithm contract. Must be one of: ``approx``, ``exact``. ``approx`` allows
+            the optimizer to use indexed or other approximate strategies when available;
+            ``exact`` forces brute-force evaluation and requires the ranking expression to be
+            deterministic.
+        direction : str
+            ``"distance"`` (smallest value first) or ``"similarity"`` (largest value first).
+        joinType : str, keyword-only, optional
+            Default ``inner``. Must be one of: ``inner``, ``leftouter``.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Joined DataFrame.
+
+        Examples
+        --------
+        >>> from pyspark.sql import functions as sf
+        >>> users = spark.createDataFrame(
+        ...     [(1, 10.0), (2, 20.0), (3, 30.0)], ["user_id", "score"])
+        >>> products = spark.createDataFrame(
+        ...     [("A", 11.0), ("B", 22.0), ("C", 5.0)], ["product", "pscore"])
+        >>> users.nearestByJoin(
+        ...     products, -sf.abs(users.score - products.pscore), 1, "exact", "similarity"
+        ... ).select("user_id", "product").orderBy("user_id").show()
+        +-------+-------+
+        |user_id|product|
+        +-------+-------+
+        |      1|      A|
+        |      2|      B|
+        |      3|      B|
+        +-------+-------+
+        """
+        ...
+
     # TODO(SPARK-22947): Fix the DataFrame API.
     @dispatch_df_method
     def _joinAsOf(
diff --git a/python/pyspark/sql/functions/__init__.py b/python/pyspark/sql/functions/__init__.py
index 7bdbc0cbbd09f..27db280be86d1 100644
--- a/python/pyspark/sql/functions/__init__.py
+++ b/python/pyspark/sql/functions/__init__.py
@@ -248,6 +248,7 @@
     "timestamp_micros",
     "timestamp_millis",
     "timestamp_seconds",
+    "time_bucket",
     "time_diff",
     "time_from_micros",
     "time_from_millis",
@@ -469,6 +470,7 @@
     "schema_of_json",
     "to_json",
     # VARIANT Functions
+    "is_valid_variant",
     "is_variant_null",
     "parse_json",
     "schema_of_variant",
diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
index 67812846cf057..185924ddc735d 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -4451,7 +4451,7 @@ def var_samp(col: "ColumnOrName") -> Column:
     --------
     :meth:`pyspark.sql.functions.variance`
     :meth:`pyspark.sql.functions.var_pop`
-    :meth:`pyspark.sql.functions.std_samp`
+    :meth:`pyspark.sql.functions.stddev_samp`
 
     Examples
     --------
@@ -4491,7 +4491,7 @@ def var_pop(col: "ColumnOrName") -> Column:
     --------
     :meth:`pyspark.sql.functions.variance`
     :meth:`pyspark.sql.functions.var_samp`
-    :meth:`pyspark.sql.functions.std_pop`
+    :meth:`pyspark.sql.functions.stddev_pop`
 
     Examples
     --------
@@ -13125,6 +13125,67 @@ def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Co
     )
 
 
+@_try_remote_functions
+def time_bucket(
+    bucket_size: "Column",
+    ts: "ColumnOrName",
+    origin: Optional["Column"] = None,
+) -> Column:
+    """
+    Aligns a timestamp to the start of a fixed-size interval bucket.
+
+    Returns the start of the bucket that ``ts`` falls into, where buckets are defined by
+    the given ``bucket_size`` interval aligned to optional ``origin``. For ``TIMESTAMP_NTZ``,
+    bucketing is performed in UTC. For ``TIMESTAMP``, year-month interval buckets and
+    calendar-day components of day-time interval buckets align to the session time zone.
+
+    .. versionadded:: 4.2.0
+
+    Parameters
+    ----------
+    bucket_size : :class:`~pyspark.sql.Column`
+        A day-time or year-month interval defining the bucket size. Must be positive
+        and foldable.
+    ts : :class:`~pyspark.sql.Column` or column name
+        A TIMESTAMP or TIMESTAMP_NTZ value to bucket.
+    origin : :class:`~pyspark.sql.Column`, optional
+        Alignment anchor. Defaults to 1970-01-01 00:00:00. Must be the same type as
+        ``ts`` and must be foldable.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        The start of the bucket containing ``ts``, as the same type as ``ts``.
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "UTC")
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [(datetime.datetime(2024, 1, 1, 11, 27, 0),)], ['ts'])
+    >>> df.select(
+    ...     sf.time_bucket(sf.expr("INTERVAL '15' MINUTE"), 'ts').alias("bucket")
+    ... ).collect()
+    [Row(bucket=datetime.datetime(2024, 1, 1, 11, 15))]
+
+    Shift the grid with an explicit origin: buckets run at :05, :20, :35, :50:
+
+    >>> df.select(
+    ...     sf.time_bucket(
+    ...         sf.expr("INTERVAL '15' MINUTE"),
+    ...         'ts',
+    ...         sf.expr("TIMESTAMP '1970-01-01 00:05:00'")
+    ...     ).alias("bucket")
+    ... ).collect()
+    [Row(bucket=datetime.datetime(2024, 1, 1, 11, 20))]
+    >>> spark.conf.unset("spark.sql.session.timeZone")
+    """
+    if origin is None:
+        return _invoke_function_over_columns("time_bucket", bucket_size, ts)
+    return _invoke_function_over_columns("time_bucket", bucket_size, ts, origin)
+
+
 @_try_remote_functions
 def window(
     timeColumn: "ColumnOrName",
@@ -21163,6 +21224,35 @@ def is_variant_null(v: "ColumnOrName") -> Column:
     return _invoke_function("is_variant_null", _to_java_column(v))
 
 
+@_try_remote_functions
+def is_valid_variant(v: "ColumnOrName") -> Column:
+    """
+    Check if a variant value is valid. Returns true if the variant is valid, false if it is
+    malformed, and NULL if the input is NULL.
+
+    .. versionadded:: 4.2.0
+
+    Parameters
+    ----------
+    v : :class:`~pyspark.sql.Column` or str
+        a variant column or column name
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a boolean column indicating whether the variant value is valid
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
+    >>> df.select(is_valid_variant(parse_json(df.json)).alias("r")).collect()
+    [Row(r=True)]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("is_valid_variant", _to_java_column(v))
+
+
 @_try_remote_functions
 def variant_get(v: "ColumnOrName", path: Union[Column, str], targetType: str) -> Column:
     """
@@ -26289,15 +26379,21 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
 
 
 @_try_remote_functions
-def st_asbinary(geo: "ColumnOrName") -> Column:
+def st_asbinary(geo: "ColumnOrName", endianness: Optional["ColumnOrName"] = None) -> Column:
     """Returns the input GEOGRAPHY or GEOMETRY value in WKB format.
 
     .. versionadded:: 4.1.0
 
+    .. versionchanged:: 4.2.0
+        Added the optional `endianness` parameter.
+
     Parameters
     ----------
     geo : :class:`~pyspark.sql.Column` or str
         A geospatial value, either a GEOGRAPHY or a GEOMETRY.
+    endianness : :class:`~pyspark.sql.Column` or str, optional
+        The optional endianness of the output WKB, 'NDR' for little-endian (default) or 'XDR' for
+        big-endian.
 
     Examples
     --------
@@ -26306,15 +26402,31 @@ def st_asbinary(geo: "ColumnOrName") -> Column:
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb'])  # noqa
     >>> df.select(sf.hex(sf.st_asbinary(sf.st_geogfromwkb('wkb')))).collect()
-    [Row(hex(st_asbinary(st_geogfromwkb(wkb)))='0101000000000000000000F03F0000000000000040')]
+    [Row(hex(st_asbinary(st_geogfromwkb(wkb), NDR))='0101000000000000000000F03F0000000000000040')]
 
     Example 2: Getting WKB from GEOMETRY.
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb'])  # noqa
     >>> df.select(sf.hex(sf.st_asbinary(sf.st_geomfromwkb('wkb')))).collect()
-    [Row(hex(st_asbinary(st_geomfromwkb(wkb, 0)))='0101000000000000000000F03F0000000000000040')]
+    [Row(hex(st_asbinary(st_geomfromwkb(wkb, 0), NDR))='0101000000000000000000F03F0000000000000040')]
+
+    Example 3: Getting WKB (little-endian) from GEOGRAPHY.
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb'])  # noqa
+    >>> df.select(sf.hex(sf.st_asbinary(sf.st_geogfromwkb('wkb'), 'NDR'))).collect()
+    [Row(hex(st_asbinary(st_geogfromwkb(wkb), NDR))='0101000000000000000000F03F0000000000000040')]
+
+    Example 4: Getting WKB (big-endian) from GEOMETRY.
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb'])  # noqa
+    >>> df.select(sf.hex(sf.st_asbinary(sf.st_geomfromwkb('wkb'), 'XDR'))).collect()
+    [Row(hex(st_asbinary(st_geomfromwkb(wkb, 0), XDR))='00000000013FF00000000000004000000000000000')]
     """
-    return _invoke_function_over_columns("st_asbinary", geo)
+    if endianness is None:
+        return _invoke_function_over_columns("st_asbinary", geo)
+    else:
+        _endianness = lit(endianness) if isinstance(endianness, str) else endianness
+        return _invoke_function_over_columns("st_asbinary", geo, _endianness)
 
 
 @_try_remote_functions
@@ -26333,7 +26445,7 @@ def st_geogfromwkb(wkb: "ColumnOrName") -> Column:
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb'])  # noqa
     >>> df.select(sf.hex(sf.st_asbinary(sf.st_geogfromwkb('wkb')))).collect()
-    [Row(hex(st_asbinary(st_geogfromwkb(wkb)))='0101000000000000000000F03F0000000000000040')]
+    [Row(hex(st_asbinary(st_geogfromwkb(wkb), NDR))='0101000000000000000000F03F0000000000000040')]
     """
     return _invoke_function_over_columns("st_geogfromwkb", wkb)
 
@@ -26358,7 +26470,7 @@ def st_geomfromwkb(
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(bytes.fromhex('0101000000000000000000F03F0000000000000040'),)], ['wkb'])  # noqa
     >>> df.select(sf.hex(sf.st_asbinary(sf.st_geomfromwkb('wkb')))).collect()
-    [Row(hex(st_asbinary(st_geomfromwkb(wkb, 0)))='0101000000000000000000F03F0000000000000040')]
+    [Row(hex(st_asbinary(st_geomfromwkb(wkb, 0), NDR))='0101000000000000000000F03F0000000000000040')]
     """
     if srid is None:
         return _invoke_function_over_columns("st_geomfromwkb", wkb)
@@ -27415,7 +27527,7 @@ def kll_merge_agg_bigint(
     sketch (range 8-65535). If k is not specified, the merged sketch adopts the k value
     from the first input sketch.
 
-    .. versionadded:: 4.1.0
+    .. versionadded:: 4.1.2
 
     Parameters
     ----------
@@ -27459,7 +27571,7 @@ def kll_merge_agg_float(
     sketch (range 8-65535). If k is not specified, the merged sketch adopts the k value
     from the first input sketch.
 
-    .. versionadded:: 4.1.0
+    .. versionadded:: 4.1.2
 
     Parameters
     ----------
@@ -27503,7 +27615,7 @@ def kll_merge_agg_double(
     sketch (range 8-65535). If k is not specified, the merged sketch adopts the k value
     from the first input sketch.
 
-    .. versionadded:: 4.1.0
+    .. versionadded:: 4.1.2
 
     Parameters
     ----------
diff --git a/python/pyspark/sql/profiler.py b/python/pyspark/sql/profiler.py
index 6bbfdcc1db51c..60f68c61708fd 100644
--- a/python/pyspark/sql/profiler.py
+++ b/python/pyspark/sql/profiler.py
@@ -27,10 +27,10 @@
     Dict,
     Iterable,
     Literal,
-    NamedTuple,
     Optional,
     Tuple,
     Union,
+    TYPE_CHECKING,
     overload,
 )
 import warnings
@@ -50,43 +50,79 @@
     PStatsParam,
 )
 
+if TYPE_CHECKING:
+    from pyspark.sql._typing import ProfileResults, ProfileResultsV2
 
-class ProfileResult(NamedTuple):
-    perf: Optional[pstats.Stats] = None
-    memory: Optional[CodeMapDict] = None
 
-    def __bool__(self) -> bool:
-        return self.perf is not None or self.memory is not None
+class _ProfileResultsParam(AccumulatorParam[Optional["ProfileResults"]]):
+    """
+    AccumulatorParam for profilers.
+    """
 
-    def replace(self, **kwargs: Any) -> "ProfileResult":
-        return self._replace(**kwargs)
+    @staticmethod
+    def zero(value: Optional["ProfileResults"]) -> Optional["ProfileResults"]:
+        return value
 
+    @staticmethod
+    def addInPlace(
+        value1: Optional["ProfileResults"], value2: Optional["ProfileResults"]
+    ) -> Optional["ProfileResults"]:
+        if value1 is None or len(value1) == 0:
+            value1 = {}
+        if value2 is None or len(value2) == 0:
+            value2 = {}
+
+        value = value1.copy()
+        for key, (perf, mem, *_) in value2.items():
+            if key in value1:
+                orig_perf, orig_mem, *_ = value1[key]
+            else:
+                orig_perf, orig_mem = (PStatsParam.zero(None), MemUsageParam.zero(None))
+            value[key] = (
+                PStatsParam.addInPlace(orig_perf, perf),
+                MemUsageParam.addInPlace(orig_mem, mem),
+            )
+        return value
 
-ProfileResults = Dict[Union[int, str], ProfileResult]
+
+ProfileResultsParam = _ProfileResultsParam()
 
 
-class _ProfileResultsParam(AccumulatorParam["ProfileResults"]):
+# _ProfileResultsParam uses (perf, memory) tuple which is very difficult
+# to extend. However, this is shared code between the server and the client
+# for spark connect. In order to gradually migrate to dict implementation,
+# we create a new AccumulatorParam on a new channel SQL_UDF_PROFIER_V2.
+# We started this in 4.2.0. When we drop support for all versions before 4.2.0,
+# we can remove _ProfileResultsParam and other old content.
+class _ProfileResultsParamV2(AccumulatorParam["ProfileResultsV2"]):
     """
     AccumulatorParam for profilers.
     """
 
     @staticmethod
-    def zero(value: "ProfileResults") -> "ProfileResults":
+    def zero(value: "ProfileResultsV2") -> "ProfileResultsV2":
         return {}
 
     @staticmethod
-    def addInPlace(value1: "ProfileResults", value2: "ProfileResults") -> "ProfileResults":
+    def addInPlace(value1: "ProfileResultsV2", value2: "ProfileResultsV2") -> "ProfileResultsV2":
         for key, result in value2.items():
             if key not in value1:
                 value1[key] = result
             else:
-                perf = PStatsParam.addInPlace(value1[key].perf, result.perf)
-                memory = MemUsageParam.addInPlace(value1[key].memory, result.memory)
-                value1[key] = ProfileResult(perf=perf, memory=memory)
+                perf = PStatsParam.addInPlace(
+                    value1[key].get("perf", None), result.get("perf", None)
+                )
+                if perf is not None:
+                    value1[key]["perf"] = perf
+                memory = MemUsageParam.addInPlace(
+                    value1[key].get("memory", None), result.get("memory", None)
+                )
+                if memory is not None:
+                    value1[key]["memory"] = memory
         return value1
 
 
-ProfileResultsParam = _ProfileResultsParam()
+ProfileResultsParamV2 = _ProfileResultsParamV2()
 
 
 class WorkerPerfProfiler:
@@ -95,9 +131,13 @@ class WorkerPerfProfiler:
     """
 
     def __init__(
-        self, accumulator: Accumulator["ProfileResults"], result_key: Union[int, str]
+        self,
+        accumulator: Accumulator[Optional["ProfileResults"]],
+        accumulator_v2: Accumulator["ProfileResultsV2"],
+        result_key: Union[int, str],
     ) -> None:
         self._accumulator = accumulator
+        self._accumulator_v2 = accumulator_v2
         self._profiler = cProfile.Profile()
         self._result_key = result_key
 
@@ -112,7 +152,13 @@ def save(self) -> None:
         # make it picklable
         st.stream = None  # type: ignore[attr-defined]
         st.strip_dirs()
-        self._accumulator.add({self._result_key: ProfileResult(perf=st)})
+        self._accumulator.add({self._result_key: (st, None)})
+
+        st = pstats.Stats(self._profiler, stream=None)
+        # make it picklable
+        st.stream = None  # type: ignore[attr-defined]
+        st.strip_dirs()
+        self._accumulator_v2.add({self._result_key: {"perf": st}})
 
     def __enter__(self) -> "WorkerPerfProfiler":
         self.start()
@@ -135,13 +181,15 @@ class WorkerMemoryProfiler:
 
     def __init__(
         self,
-        accumulator: Accumulator["ProfileResults"],
+        accumulator: Accumulator[Optional["ProfileResults"]],
+        accumulator_v2: Accumulator["ProfileResultsV2"],
         result_key: Union[int, str],
         func_or_code: Union[Callable, CodeType],
     ) -> None:
         from pyspark.memory_profiler_ext import UDFLineProfilerV2
 
         self._accumulator = accumulator
+        self._accumulator_v2 = accumulator_v2
         self._profiler = UDFLineProfilerV2()
         if isinstance(func_or_code, CodeType):
             self._profiler.add_code(func_or_code)
@@ -160,7 +208,8 @@ def save(self) -> None:
             filename: list(line_iterator)
             for filename, line_iterator in self._profiler.code_map.items()
         }
-        self._accumulator.add({self._result_key: ProfileResult(memory=codemap_dict)})
+        self._accumulator.add({self._result_key: (None, codemap_dict)})
+        self._accumulator_v2.add({self._result_key: {"memory": codemap_dict}})
 
     def __enter__(self) -> "WorkerMemoryProfiler":
         self.start()
@@ -227,9 +276,9 @@ def show(id: Union[int, str]) -> None:
     def _perf_profile_results(self) -> Dict[Union[int, str], pstats.Stats]:
         with self._lock:
             return {
-                result_id: result.perf
+                result_id: result["perf"]
                 for result_id, result in self._profile_results.items()
-                if result.perf is not None
+                if result.get("perf", None) is not None
             }
 
     def show_memory_profiles(self, id: Optional[Union[int, str]] = None) -> None:
@@ -273,14 +322,14 @@ def show(id: Union[int, str]) -> None:
     def _memory_profile_results(self) -> Dict[Union[int, str], CodeMapDict]:
         with self._lock:
             return {
-                result_id: result.memory
+                result_id: result["memory"]
                 for result_id, result in self._profile_results.items()
-                if result.memory is not None
+                if result.get("memory", None) is not None
             }
 
     @property
     @abstractmethod
-    def _profile_results(self) -> "ProfileResults":
+    def _profile_results(self) -> "ProfileResultsV2":
         """
         Get the profile results.
         """
@@ -369,12 +418,12 @@ def clear_perf_profiles(self, id: Optional[Union[int, str]] = None) -> None:
         with self._lock:
             if id is not None:
                 if id in self._profile_results:
-                    self._profile_results[id] = self._profile_results[id].replace(perf=None)
+                    self._profile_results[id].pop("perf", None)
                     if not self._profile_results[id]:
                         self._profile_results.pop(id)
             else:
                 for id in list(self._profile_results.keys()):
-                    self._profile_results[id] = self._profile_results[id].replace(perf=None)
+                    self._profile_results[id].pop("perf", None)
                     if not self._profile_results[id]:
                         self._profile_results.pop(id)
 
@@ -393,12 +442,12 @@ def clear_memory_profiles(self, id: Optional[Union[int, str]] = None) -> None:
         with self._lock:
             if id is not None:
                 if id in self._profile_results:
-                    self._profile_results[id] = self._profile_results[id].replace(memory=None)
+                    self._profile_results[id].pop("memory", None)
                     if not self._profile_results[id]:
                         self._profile_results.pop(id)
             else:
                 for id in list(self._profile_results.keys()):
-                    self._profile_results[id] = self._profile_results[id].replace(memory=None)
+                    self._profile_results[id].pop("memory", None)
                     if not self._profile_results[id]:
                         self._profile_results.pop(id)
 
@@ -406,15 +455,16 @@ def clear_memory_profiles(self, id: Optional[Union[int, str]] = None) -> None:
 class AccumulatorProfilerCollector(ProfilerCollector):
     def __init__(self) -> None:
         super().__init__()
-        if SpecialAccumulatorIds.SQL_UDF_PROFIER in _accumulatorRegistry:
-            self._accumulator = _accumulatorRegistry[SpecialAccumulatorIds.SQL_UDF_PROFIER]
+
+        if SpecialAccumulatorIds.SQL_UDF_PROFIER_V2 in _accumulatorRegistry:
+            self._accumulator = _accumulatorRegistry[SpecialAccumulatorIds.SQL_UDF_PROFIER_V2]
         else:
             self._accumulator = Accumulator(
-                SpecialAccumulatorIds.SQL_UDF_PROFIER, {}, ProfileResultsParam
+                SpecialAccumulatorIds.SQL_UDF_PROFIER_V2, {}, ProfileResultsParamV2
             )
 
     @property
-    def _profile_results(self) -> "ProfileResults":
+    def _profile_results(self) -> "ProfileResultsV2":
         with self._lock:
             value = self._accumulator.value
             return value if value is not None else {}
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index f0fb6de4891a0..b6366a35475c3 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -626,12 +626,19 @@ def create(self) -> "SparkSession":
                 from pyspark.core.context import SparkContext
 
                 with self._lock:
-                    # Build SparkConf from options
-                    sparkConf = SparkConf()
-                    for key, value in self._options.items():
-                        sparkConf.set(key, str(value))
-
-                    sc = SparkContext.getOrCreate(sparkConf)
+                    instantiated_session = SparkSession._instantiatedSession
+                    # Get SparkContext
+                    if (
+                        instantiated_session is not None
+                        and instantiated_session._sc._jsc is not None
+                    ):
+                        sc = instantiated_session._sc
+                    else:
+                        sparkConf = SparkConf()
+                        for key, value in self._options.items():
+                            sparkConf.set(key, value)
+                        # This SparkContext may be an existing one.
+                        sc = SparkContext.getOrCreate(sparkConf)
                     jSparkSessionClass = SparkSession._get_j_spark_session_class(sc._jvm)
                     # Create a new SparkSession in the JVM
                     jSparkSession = jSparkSessionClass.builder().config(self._options).create()
diff --git a/python/pyspark/sql/tests/arrow/test_arrow_map.py b/python/pyspark/sql/tests/arrow/test_arrow_map.py
index af26260849259..8dd095a176b9c 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow_map.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_map.py
@@ -42,7 +42,7 @@
     not have_pandas or not have_pyarrow,
     pandas_requirement_message or pyarrow_requirement_message,
 )
-class MapInArrowTestsMixin(object):
+class MapInArrowTestsMixin:
     def test_map_in_arrow(self):
         def func(iterator):
             for batch in iterator:
diff --git a/python/pyspark/sql/tests/connect/client/test_artifact.py b/python/pyspark/sql/tests/connect/client/test_artifact.py
index 8400e4d1562ad..87ea570204e71 100644
--- a/python/pyspark/sql/tests/connect/client/test_artifact.py
+++ b/python/pyspark/sql/tests/connect/client/test_artifact.py
@@ -180,7 +180,7 @@ def test_add_file(self):
 
 
 @unittest.skipIf(is_remote_only(), "Requires JVM access")
-class ArtifactTests(ReusedConnectTestCase, ArtifactTestsMixin):
+class ArtifactTests(ArtifactTestsMixin, ReusedConnectTestCase):
     @classmethod
     def root(cls):
         from pyspark.core.files import SparkFiles
diff --git a/python/pyspark/sql/tests/connect/client/test_artifact_localcluster.py b/python/pyspark/sql/tests/connect/client/test_artifact_localcluster.py
index 8e3065104737b..8308a74b9057b 100644
--- a/python/pyspark/sql/tests/connect/client/test_artifact_localcluster.py
+++ b/python/pyspark/sql/tests/connect/client/test_artifact_localcluster.py
@@ -20,7 +20,7 @@
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
-class LocalClusterArtifactTests(ReusedConnectTestCase, ArtifactTestsMixin):
+class LocalClusterArtifactTests(ArtifactTestsMixin, ReusedConnectTestCase):
     @classmethod
     def conf(cls):
         return (
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py b/python/pyspark/sql/tests/connect/client/test_client.py
index 85fbafe227284..9b0f59522e257 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -499,7 +499,10 @@ def test_on_exit_does_not_call_when_already_closed(self):
         client._release_session_on_exit = True
         client._closed = True
 
-        call_tracker = {"release_session": 0, "close": 0}
+        call_tracker = {"cleanup_ml_cache": 0, "release_session": 0, "close": 0}
+
+        def mock_cleanup_ml_cache():
+            call_tracker["cleanup_ml_cache"] += 1
 
         def mock_release_session():
             call_tracker["release_session"] += 1
@@ -507,11 +510,13 @@ def mock_release_session():
         def mock_close():
             call_tracker["close"] += 1
 
+        client._cleanup_ml_cache = mock_cleanup_ml_cache
         client.release_session = mock_release_session
         client.close = mock_close
 
         client._on_exit()
 
+        self.assertEqual(call_tracker["cleanup_ml_cache"], 0)
         self.assertEqual(call_tracker["release_session"], 0)
         self.assertEqual(call_tracker["close"], 0)
 
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
index b9583c4a11fa9..a8b46c38db4fe 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -18,6 +18,7 @@
 import time
 
 import pyspark.cloudpickle
+from pyspark.errors import AnalysisException
 from pyspark.sql.tests.streaming.test_streaming_listener import StreamingListenerTestsMixin
 from pyspark.sql.streaming.listener import StreamingQueryListener
 from pyspark.sql.functions import count, lit
@@ -257,7 +258,13 @@ def test_listener_events_spark_command(self):
 
                 @eventually(timeout=60, catch_assertions=True)
                 def load_event(event_name, table_name):
-                    table = self.spark.read.table(table_name).collect()
+                    try:
+                        table = self.spark.read.table(table_name).collect()
+                    except AnalysisException as e:
+                        # It's possible that the table has not been created yet
+                        if e.getCondition() == "TABLE_OR_VIEW_NOT_FOUND":
+                            return False
+                        raise e
                     if len(table) == 0:
                         return False
                     events[event_name] = pyspark.cloudpickle.loads(table[0][0])
diff --git a/python/pyspark/sql/tests/connect/test_connect_session.py b/python/pyspark/sql/tests/connect/test_connect_session.py
index 210913db7632d..92be5c83c048b 100644
--- a/python/pyspark/sql/tests/connect/test_connect_session.py
+++ b/python/pyspark/sql/tests/connect/test_connect_session.py
@@ -18,6 +18,7 @@
 import os
 import unittest
 import uuid
+from typing import Optional
 
 from pyspark.util import is_remote_only
 from pyspark.errors import PySparkException
@@ -32,7 +33,7 @@
 if should_test_connect:
     import grpc
     from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
-    from pyspark.sql.connect.client import ChannelBuilder
+    from pyspark.sql.connect.client import ChannelBuilder, DefaultChannelBuilder
     from pyspark.errors.exceptions.connect import (
         AnalysisException,
         SparkConnectException,
@@ -40,6 +41,11 @@
         SparkUpgradeException,
     )
 
+    class CustomChannelBuilder(DefaultChannelBuilder):
+        @property
+        def userId(self) -> Optional[str]:
+            return "abc"
+
 
 @unittest.skipIf(is_remote_only(), "Session creation different from local mode")
 class SparkConnectSessionTests(ReusedConnectTestCase):
@@ -299,6 +305,89 @@ def test_authentication(self):
         self.assertTrue("Invalid authentication token" in str(e.exception))
 
 
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class SparkConnectSessionBuilderTests(unittest.TestCase):
+    def setUp(self):
+        # Reset class-level session state so tests are order-independent.
+        RemoteSparkSession._default_session = None
+        RemoteSparkSession._active_session.session = None
+
+    def test_fails_to_create_session_without_remote_and_channel_builder(self):
+        with self.assertRaises(ValueError):
+            RemoteSparkSession.builder.getOrCreate()
+
+    def test_fails_to_create_when_both_remote_and_channel_builder_are_specified(self):
+        with self.assertRaises(ValueError):
+            (
+                RemoteSparkSession.builder.channelBuilder(CustomChannelBuilder("sc://localhost"))
+                .remote("sc://localhost")
+                .getOrCreate()
+            )
+
+    def test_creates_session_with_channel_builder(self):
+        test_session = RemoteSparkSession.builder.channelBuilder(
+            CustomChannelBuilder("sc://other")
+        ).getOrCreate()
+        host = test_session.client.host
+        # Skip release_session() since "sc://other" is a fake remote.
+        test_session.release_session_on_close = False
+        test_session.stop()
+
+        self.assertEqual("other", host)
+
+    def test_creates_session_with_remote(self):
+        test_session = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+        host = test_session.client.host
+        test_session.release_session_on_close = False
+        test_session.stop()
+
+        self.assertEqual("other", host)
+
+    def test_session_stop(self):
+        session = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+        session.release_session_on_close = False
+
+        self.assertFalse(session.is_stopped)
+        session.stop()
+        self.assertTrue(session.is_stopped)
+
+    def test_session_create_sets_active_session(self):
+        session = RemoteSparkSession.builder.remote("sc://abc").create()
+        session2 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+
+        self.assertIs(session, session2)
+        session.release_session_on_close = False
+        session.stop()
+
+    def test_active_session_expires_when_client_closes(self):
+        s1 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+        s2 = RemoteSparkSession.getActiveSession()
+
+        self.assertIs(s1, s2)
+
+        # We don't call close() to avoid executing ExecutePlanResponseReattachableIterator
+        s1._client._closed = True
+
+        self.assertIsNone(RemoteSparkSession.getActiveSession())
+        s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+
+        self.assertIsNot(s1, s3)
+
+    def test_default_session_expires_when_client_closes(self):
+        s1 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+        s2 = RemoteSparkSession._get_default_session()
+
+        self.assertIs(s1, s2)
+
+        # We don't call close() to avoid executing ExecutePlanResponseReattachableIterator
+        s1._client._closed = True
+
+        self.assertIsNone(RemoteSparkSession._get_default_session())
+        s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+
+        self.assertIsNot(s1, s3)
+
+
 @unittest.skipIf(not should_test_connect, connect_requirement_message)
 class SparkConnectSessionWithOptionsTest(unittest.TestCase):
     def setUp(self) -> None:
diff --git a/python/pyspark/sql/tests/connect/test_parity_column.py b/python/pyspark/sql/tests/connect/test_parity_column.py
index 3903bb57a3750..a2b00d7955eee 100644
--- a/python/pyspark/sql/tests/connect/test_parity_column.py
+++ b/python/pyspark/sql/tests/connect/test_parity_column.py
@@ -17,6 +17,8 @@
 
 import unittest
 
+from pyspark.errors import AnalysisException
+from pyspark.sql import functions as sf
 from pyspark.sql.tests.test_column import ColumnTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
@@ -38,6 +40,16 @@ def tearDownClass(cls):
     def test_validate_column_types(self):
         super().test_validate_column_types()
 
+    def test_resolve_after_union(self):
+        # Connect diverges from Classic here: Union is treated as a leaf when
+        # walking the plan tree for plan-id resolution, so the left-side plan
+        # id is never found and CANNOT_RESOLVE_DATAFRAME_COLUMN is thrown
+        # before any name-based fallback - in both strict and lenient modes.
+        df1 = self.spark.sql("SELECT 1 AS c")
+        df2 = self.spark.sql("SELECT 2 AS c")
+        with self.assertRaisesRegex(AnalysisException, "CANNOT_RESOLVE_DATAFRAME_COLUMN"):
+            df1.union(df2).select(df1.c).collect()
+
     def test_df_col_resolution_mode(self):
         self.assertEqual(
             self.spark.conf.get("spark.sql.analyzer.strictDataFrameColumnResolution"),
@@ -68,6 +80,30 @@ def test_df_col_resolution_mode(self):
             "false",
         )
 
+    # The shadowing trio diverges in lenient mode: where Classic and Connect
+    # strict raise, lenient resolves the tagged reference by name against the
+    # current (shadowed) output.
+
+    def test_resolve_after_chained_withcolumn_shadow(self):
+        df = self.spark.sql("SELECT 1 AS c")
+        rows = (
+            df.withColumn("c", sf.col("c").cast("string"))
+            .withColumn("c", sf.col("c").cast("int"))
+            .select(df.c)
+            .collect()
+        )
+        self.assertEqual([r.c for r in rows], [1])
+
+    def test_resolve_after_select_alias_shadow(self):
+        df = self.spark.sql("SELECT 1 AS c")
+        rows = df.select(df.c.cast("string").alias("c")).select(df.c).collect()
+        self.assertEqual([r.c for r in rows], ["1"])
+
+    def test_resolve_after_agg_alias_shadow(self):
+        df = self.spark.sql("SELECT 1 AS c")
+        rows = df.groupBy().agg(sf.sum("c").alias("c")).select(df.c).collect()
+        self.assertEqual([r.c for r in rows], [1])
+
 
 if __name__ == "__main__":
     from pyspark.testing import main
diff --git a/python/pyspark/sql/tests/connect/test_parity_nearest_by_join.py b/python/pyspark/sql/tests/connect/test_parity_nearest_by_join.py
new file mode 100644
index 0000000000000..1fb0f5b620463
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_nearest_by_join.py
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from pyspark.sql.tests.test_nearest_by_join import NearestByJoinTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class NearestByJoinParityTests(NearestByJoinTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.testing import main
+
+    main()
diff --git a/python/pyspark/sql/tests/connect/test_resources.py b/python/pyspark/sql/tests/connect/test_parity_resources.py
similarity index 93%
rename from python/pyspark/sql/tests/connect/test_resources.py
rename to python/pyspark/sql/tests/connect/test_parity_resources.py
index 2a81e22e9c7dd..0eb7c4b31338c 100644
--- a/python/pyspark/sql/tests/connect/test_resources.py
+++ b/python/pyspark/sql/tests/connect/test_parity_resources.py
@@ -20,7 +20,7 @@
 from pyspark.sql.tests.test_resources import ResourceProfileTestsMixin
 
 
-class ResourceProfileTests(ResourceProfileTestsMixin, ReusedConnectTestCase):
+class ResourceProfileParityTests(ResourceProfileTestsMixin, ReusedConnectTestCase):
     @classmethod
     def master(cls):
         return os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local-cluster[1, 4, 1024]")
diff --git a/python/pyspark/sql/tests/connect/test_session.py b/python/pyspark/sql/tests/connect/test_session.py
deleted file mode 100644
index 385dcbd218ef2..0000000000000
--- a/python/pyspark/sql/tests/connect/test_session.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import unittest
-from typing import Optional
-
-from pyspark.testing.connectutils import (
-    should_test_connect,
-    connect_requirement_message,
-)
-
-if should_test_connect:
-    from pyspark.sql.connect.client import DefaultChannelBuilder
-    from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
-
-    class CustomChannelBuilder(DefaultChannelBuilder):
-        @property
-        def userId(self) -> Optional[str]:
-            return "abc"
-
-
-@unittest.skipIf(not should_test_connect, connect_requirement_message)
-class SparkSessionTestCase(unittest.TestCase):
-    def test_fails_to_create_session_without_remote_and_channel_builder(self):
-        with self.assertRaises(ValueError):
-            RemoteSparkSession.builder.getOrCreate()
-
-    def test_fails_to_create_when_both_remote_and_channel_builder_are_specified(self):
-        with self.assertRaises(ValueError):
-            (
-                RemoteSparkSession.builder.channelBuilder(CustomChannelBuilder("sc://localhost"))
-                .remote("sc://localhost")
-                .getOrCreate()
-            )
-
-    def test_creates_session_with_channel_builder(self):
-        test_session = RemoteSparkSession.builder.channelBuilder(
-            CustomChannelBuilder("sc://other")
-        ).getOrCreate()
-        host = test_session.client.host
-        test_session.stop()
-
-        self.assertEqual("other", host)
-
-    def test_creates_session_with_remote(self):
-        test_session = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
-        host = test_session.client.host
-        test_session.stop()
-
-        self.assertEqual("other", host)
-
-    def test_session_stop(self):
-        session = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
-
-        self.assertFalse(session.is_stopped)
-        session.stop()
-        self.assertTrue(session.is_stopped)
-
-    def test_session_create_sets_active_session(self):
-        session = RemoteSparkSession.builder.remote("sc://abc").create()
-        session2 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
-
-        self.assertIs(session, session2)
-        session.stop()
-
-    def test_active_session_expires_when_client_closes(self):
-        s1 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
-        s2 = RemoteSparkSession.getActiveSession()
-
-        self.assertIs(s1, s2)
-
-        # We don't call close() to avoid executing ExecutePlanResponseReattachableIterator
-        s1._client._closed = True
-
-        self.assertIsNone(RemoteSparkSession.getActiveSession())
-        s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
-
-        self.assertIsNot(s1, s3)
-
-    def test_default_session_expires_when_client_closes(self):
-        s1 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
-        s2 = RemoteSparkSession.getDefaultSession()
-
-        self.assertIs(s1, s2)
-
-        # We don't call close() to avoid executing ExecutePlanResponseReattachableIterator
-        s1._client._closed = True
-
-        self.assertIsNone(RemoteSparkSession.getDefaultSession())
-        s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
-
-        self.assertIsNot(s1, s3)
diff --git a/python/pyspark/sql/tests/connect/test_utils.py b/python/pyspark/sql/tests/connect/test_utils.py
index bd7f2f7df2da8..04531a588f732 100644
--- a/python/pyspark/sql/tests/connect/test_utils.py
+++ b/python/pyspark/sql/tests/connect/test_utils.py
@@ -19,7 +19,7 @@
 from pyspark.sql.tests.test_utils import UtilsTestsMixin
 
 
-class ConnectUtilsTests(ReusedConnectTestCase, UtilsTestsMixin):
+class ConnectUtilsTests(UtilsTestsMixin, ReusedConnectTestCase):
     pass
 
 
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index 750ed101cc363..bfcedc6c8899f 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -541,7 +541,7 @@ def func_with_logging(iterator):
             )
 
 
-class MapInPandasTests(ReusedSQLTestCase, MapInPandasTestsMixin):
+class MapInPandasTests(MapInPandasTestsMixin, ReusedSQLTestCase):
     @classmethod
     def setUpClass(cls):
         ReusedSQLTestCase.setUpClass()
diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index 92ffea233215f..d832a9ffa7d04 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -588,6 +588,47 @@ def test_catalog_analyze_table(self):
             spark.sql(f"INSERT INTO {t} VALUES (1)")
             spark.catalog.analyzeTable(t, noScan=True)
 
+    def test_path_current_path_disabled(self):
+        # current_path() is a regular builtin and resolves even when
+        # spark.sql.path.enabled is false. The DataFrame and SQL surfaces must agree.
+        from pyspark.sql.functions import current_path
+
+        spark = self.spark
+        with self.sql_conf({"spark.sql.path.enabled": False}):
+            sql_form = spark.sql("SELECT current_path()").collect()[0][0]
+            self.assertIsInstance(sql_form, str)
+            self.assertNotEqual(sql_form, "")
+            api_form = spark.range(1).select(current_path()).collect()[0][0]
+            self.assertEqual(sql_form, api_form)
+
+    def test_path_set_path_and_current_path(self):
+        # SET PATH is parsed and applied; current_path() reflects it
+        # over both the SQL and DataFrame surfaces. Restores DEFAULT_PATH on exit.
+        from pyspark.sql.functions import current_path
+
+        spark = self.spark
+        with self.sql_conf({"spark.sql.path.enabled": True}):
+            try:
+                spark.sql("SET PATH = spark_catalog.default, system.builtin")
+                sql_form = spark.sql("SELECT current_path()").collect()[0][0]
+                self.assertEqual(sql_form, "spark_catalog.default,system.builtin")
+                api_form = spark.range(1).select(current_path()).collect()[0][0]
+                self.assertEqual(sql_form, api_form)
+            finally:
+                spark.sql("SET PATH = DEFAULT_PATH")
+
+    def test_path_set_path_rejected_when_disabled(self):
+        # SET PATH must raise UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED
+        # when the feature flag is off (covers both classic and Connect error paths).
+        spark = self.spark
+        with self.sql_conf({"spark.sql.path.enabled": False}):
+            with self.assertRaises(AnalysisException) as ctx:
+                spark.sql("SET PATH = spark_catalog.default")
+            self.assertEqual(
+                ctx.exception.getCondition(),
+                "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED",
+            )
+
 
 class CatalogTests(CatalogTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index 06359854c6d51..6a99c7de1a52d 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -20,10 +20,12 @@
 from itertools import chain
 import datetime
 import unittest
+import uuid
 
 from pyspark.sql import Column, Row
 from pyspark.sql import functions as sf
-from pyspark.sql.types import StructType, StructField, IntegerType, LongType
+from pyspark.sql.window import Window
+from pyspark.sql.types import StructType, StructField, IntegerType, LongType, StringType
 from pyspark.errors import AnalysisException, PySparkTypeError, PySparkValueError
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 from pyspark.testing.utils import have_pandas, pandas_requirement_message
@@ -558,6 +560,33 @@ def test_select_join_keys(self):
             self.assertTrue(df1.join(df2, "id", how).select(df1["id"]).count() >= 0, how)
             self.assertTrue(df1.join(df2, "id", how).select(df2["id"]).count() >= 0, how)
 
+    def test_select_regular_column_with_reused_dataframe_hidden_in_natural_join(self):
+        # A DataFrame appears both as a direct join side and inside a natural/USING
+        # join that hides one of its columns into `metadataOutput`. When resolving
+        # `df2["id"]`, two candidates match the plan id: one from `p.output` (the
+        # direct join side) and one only visible via `p.metadataOutput` (the reused
+        # `df2` nested under the USING-join wrapper). We should prefer the regular
+        # candidate and not throw AMBIGUOUS_COLUMN_REFERENCE.
+        df1 = self.spark.createDataFrame([(10, "T1"), (20, "T2")], ["key", "val"])
+        df2 = self.spark.createDataFrame([(10,), (20,), (30,)], ["id"])
+        # The second row's id (99) does not match any df2 row, so the USING
+        # left-join in `enriched` produces NULL on the df2 side for val "T2".
+        # If `df2["id"]` were resolved to the hidden (USING-wrapper) candidate,
+        # the second row would yield NULL instead of 20, and the assertion below
+        # would fail. This pins resolution to the direct-side `id`.
+        df3 = self.spark.createDataFrame([(10, "T1"), (99, "T2")], ["id", "val"])
+        enriched = df3.join(df2, "id", "left")
+        result = (
+            df1.join(df2, df1["key"] == df2["id"], "left")
+            .join(enriched, "val", "full_outer")
+            .sort("val")
+            .select(df2["id"])
+        )
+        self.assertEqual(
+            [r["id"] for r in result.collect()],
+            [10, 20],
+        )
+
     def test_drop_notexistent_col(self):
         df1 = self.spark.createDataFrame(
             [("a", "b", "c")],
@@ -578,6 +607,388 @@ def test_drop_notexistent_col(self):
         self.assertEqual(df4.columns, ["colA", "colB", "colC", "colC", "colD", "colE"])
         self.assertEqual(df4.count(), 1)
 
+    # --- Mixed-surface layered DataFrame programs ---------------------------
+    #
+    # These tests chain multiple DataFrame transformations - semi-joins
+    # (for SQL EXISTS/IN), window functions, cube aggregations, UDFs and
+    # struct field access - into 4-5 layer pipelines, then reference the
+    # final layered DataFrame's columns via ``layered.col`` in both filter
+    # and select at the outermost surface. The goal is to catch regressions
+    # in plan-id propagation across analyzer rules that single-operator
+    # tests miss when rules interact.
+
+    def test_layered_semijoin_groupby_window(self):
+        # 4-layer DataFrame pipeline: filter -> semi-join -> groupBy/agg
+        # -> window functions. ``layered.col`` references appear in both
+        # filter and select at the outermost surface.
+        events_data = [
+            (1, 1, "Books", 100.0, 2, True),
+            (2, 1, "Books", 50.0, 3, True),
+            (3, 2, "Electronics", 200.0, 1, True),
+            (4, 2, "Electronics", 300.0, 2, True),
+            (5, 3, "Home", 80.0, 4, True),
+            (6, 4, "Books", 60.0, 1, False),
+        ]
+        users_data = [(1, 25), (2, 30), (3, 22), (4, 18)]
+        events_cols = ["id", "user_id", "category", "amount", "quantity", "is_active"]
+        users_cols = ["id", "age"]
+
+        events = self.spark.createDataFrame(events_data, events_cols)
+        users = self.spark.createDataFrame(users_data, users_cols)
+        # Layer 1: filter + semi-join (DataFrame-API equivalent of
+        # WHERE is_active AND EXISTS (user with age > 20)).
+        active = events.where(events.is_active).join(
+            users.where(users.age > 20),
+            events.user_id == users.id,
+            "left_semi",
+        )
+        # Layer 2: groupBy + agg, then post-agg filter (HAVING equivalent).
+        agg = active.groupBy("category").agg(
+            sf.sum(active.amount * active.quantity * sf.lit(0.1)).alias("total_amt"),
+            sf.sum(active.amount).alias("amount_sum"),
+        )
+        totals = agg.where(agg.amount_sum > 50).select("category", "total_amt")
+        # Layer 3: window functions on top of the aggregate.
+        running = Window.orderBy("total_amt").rowsBetween(-1, 1)
+        ranking = Window.orderBy(totals.total_amt.desc())
+        windowed = totals.select(
+            "category",
+            "total_amt",
+            sf.avg(totals.total_amt).over(running).alias("running_avg"),
+            sf.rank().over(ranking).alias("rank_num"),
+        )
+        # Layer 4: outer filter.
+        layered = windowed.where(windowed.rank_num <= 5)
+
+        rows = (
+            layered.filter(layered.rank_num <= 3)
+            .select(
+                layered.category,
+                layered.total_amt,
+                layered.running_avg,
+                layered.rank_num,
+            )
+            .collect()
+        )
+        result = sorted((r.category, r.rank_num) for r in rows)
+        self.assertEqual(result, [("Books", 2), ("Electronics", 1), ("Home", 3)])
+
+    def test_layered_struct_semijoin_cube_ntile(self):
+        # 5-layer DataFrame pipeline: filter -> semi-join -> struct field
+        # access -> cube aggregation -> window NTILE. ``layered.col``
+        # references appear in both filter and select at the outermost
+        # surface.
+        events_schema = StructType(
+            [
+                StructField("id", IntegerType()),
+                StructField("category", StringType()),
+                StructField("status", StringType()),
+                StructField("amount", IntegerType()),
+                StructField("quantity", IntegerType()),
+                StructField(
+                    "detail",
+                    StructType(
+                        [
+                            StructField("name", StringType()),
+                            StructField("nested", StructType([StructField("x", IntegerType())])),
+                        ]
+                    ),
+                ),
+            ]
+        )
+        events_data = [
+            (1, "Books", "A", 100, 5, ("alpha", (1,))),
+            (2, "Electronics", "B", 200, 3, ("beta", (2,))),
+            (3, "Books", "A", 50, 7, ("alpha", (1,))),
+            (4, "Electronics", "B", 300, 4, ("beta", (2,))),
+            (5, "Home", "C", 80, 2, ("gamma", (3,))),
+        ]
+        categories_data = [("Books", 1), ("Electronics", 2), ("Home", 3), ("Toys", 5)]
+        categories_cols = ["name", "priority"]
+
+        events = self.spark.createDataFrame(events_data, events_schema)
+        categories = self.spark.createDataFrame(categories_data, categories_cols)
+        # Layer 1: filter + semi-join (DataFrame-API equivalent of
+        # WHERE quantity > 1 AND category IN (SELECT ...)).
+        filtered = events.where(events.quantity > 1).join(
+            categories.where(categories.priority <= 3),
+            events.category == categories.name,
+            "left_semi",
+        )
+        # Layer 2: project with struct field access (struct subfields use
+        # bracket access since ``detail.name`` would hit ``Column.name``).
+        base = filtered.select(
+            filtered.id,
+            filtered.category,
+            filtered.status,
+            filtered.amount,
+            filtered.detail["name"].alias("detail_name"),
+            filtered.detail["nested"]["x"].alias("nx"),
+        )
+        # Layer 3: cube aggregation (mixed grouping levels - similar
+        # surface area to SQL GROUPING SETS without an exact equivalent
+        # in the DataFrame API).
+        agg = base.cube("category", "status", "detail_name").agg(
+            sf.sum(base.amount).alias("total"), sf.count(sf.lit(1)).alias("cnt")
+        )
+        grouped = agg.where(agg.category.isNotNull() & agg.status.isNotNull())
+        # Layer 4: NTILE window.
+        tiled = grouped.withColumn("tile", sf.ntile(2).over(Window.orderBy(grouped.total.desc())))
+        # Layer 5: outer filter.
+        layered = tiled.where(tiled.tile <= 2)
+
+        rows = (
+            layered.filter(layered.tile >= 1)
+            .select(
+                layered.category,
+                layered.status,
+                layered.detail_name,
+                layered.total,
+                layered.cnt,
+                layered.tile,
+            )
+            .collect()
+        )
+        # Cube emits one (category, status, detail_name) group per distinct
+        # combination plus one (category, status, NULL) subtotal per distinct
+        # (category, status) pair. The where filter keeps both.
+        self.assertEqual(len(rows), 6)
+        self.assertEqual({r.category for r in rows}, {"Books", "Electronics", "Home"})
+        self.assertEqual({r.total for r in rows}, {80, 150, 500})
+        self.assertEqual({r.tile for r in rows}, {1, 2})
+
+    def test_layered_window_window_udf(self):
+        # 4-layer DataFrame pipeline: filter -> running-total window ->
+        # per-partition max window -> UDF wrap. ``layered.col`` references
+        # appear in both filter and select at the outermost surface.
+        data = [
+            (1, "A", 100),
+            (2, "A", 200),
+            (3, "B", 150),
+            (4, "B", 250),
+            (5, "C", 50),
+        ]
+        cols = ["id", "category", "amount"]
+
+        df = self.spark.createDataFrame(data, cols)
+        # Layer 1: filter (replaces WHERE EXISTS amount > 0).
+        filtered = df.where(df.amount > 0)
+        # Layer 2: running total window.
+        run_w = Window.partitionBy("category").orderBy("id")
+        with_run = filtered.withColumn("run_amt", sf.sum(filtered.amount).over(run_w))
+        # Layer 3: per-category max window (replaces correlated subquery
+        # for cat_max).
+        cat_w = Window.partitionBy("category")
+        with_max = with_run.withColumn("cat_max", sf.max(with_run.amount).over(cat_w))
+        # Layer 4: UDF.
+        double = sf.udf(lambda x: x * 2 if x is not None else None, IntegerType())
+        layered = with_max.withColumn("doubled_amt", double(with_max.amount))
+
+        rows = (
+            layered.filter(layered.amount > 0)
+            .select(
+                layered.id,
+                layered.category,
+                layered.amount,
+                layered.run_amt,
+                layered.cat_max,
+                layered.doubled_amt,
+            )
+            .collect()
+        )
+        result = sorted(
+            (r.id, r.category, r.amount, r.run_amt, r.cat_max, r.doubled_amt) for r in rows
+        )
+        self.assertEqual(
+            result,
+            [
+                (1, "A", 100, 100, 200, 200),
+                (2, "A", 200, 300, 200, 400),
+                (3, "B", 150, 150, 250, 300),
+                (4, "B", 250, 400, 250, 500),
+                (5, "C", 50, 50, 50, 100),
+            ],
+        )
+
+    # --- Tagged DataFrame column resolution --------------------------------
+    #
+    # ``df.col`` / ``df["col"]`` carries the source DataFrame's plan id. These
+    # tests pin how that tagged reference resolves after assorted operators.
+    # The behavior is shared across Spark Classic and Spark Connect (both
+    # ``spark.sql.analyzer.strictDataFrameColumnResolution`` modes) except for
+    # a few diverging cases, which are overridden in the Connect parity suites
+    # (``ColumnParityTests`` / ``...WithNonStrictDFColResolution``):
+    #
+    #   * the shadowing trio - Classic and Connect strict raise, Connect
+    #     lenient resolves the shadowed name via name-based fallback;
+    #   * union - Classic resolves via attribute-id propagation, Connect
+    #     raises in both modes.
+
+    def test_resolve_after_chained_withcolumn_shadow(self):
+        # Two consecutive withColumn calls each shadow `c` with a new
+        # attribute of the same name, so the original `c` leaves the
+        # projection and the tagged `df.c` cannot resolve.
+        # Connect lenient diverges: name-based fallback resolves the
+        # shadowed name (overridden in the lenient parity suite).
+        df = self.spark.sql("SELECT 1 AS c")
+        with self.assertRaises(AnalysisException):
+            df.withColumn("c", sf.col("c").cast("string")).withColumn(
+                "c", sf.col("c").cast("int")
+            ).select(df.c).collect()
+
+    def test_resolve_after_select_alias_shadow(self):
+        # Same shadowing shape as withColumn but via select + alias.
+        # Connect lenient diverges: name-based fallback resolves the
+        # shadowed name (overridden in the lenient parity suite).
+        df = self.spark.sql("SELECT 1 AS c")
+        with self.assertRaises(AnalysisException):
+            df.select(df.c.cast("string").alias("c")).select(df.c).collect()
+
+    def test_resolve_after_withcolumnrenamed(self):
+        # withColumnRenamed drops the original `c` attribute and projects it
+        # as `c2`; the tagged `df.c` matches neither the original attribute
+        # nor a current column named `c`, so all modes raise.
+        df = self.spark.sql("SELECT 1 AS c")
+        with self.assertRaises(AnalysisException):
+            df.withColumnRenamed("c", "c2").select(df.c).collect()
+
+    def test_resolve_after_drop(self):
+        # drop("c") removes the column entirely; the tagged `df.c` cannot
+        # resolve under any mode.
+        df = self.spark.sql("SELECT 1 AS c, 2 AS d")
+        with self.assertRaises(AnalysisException):
+            df.drop("c").select(df.c).collect()
+
+    def test_resolve_through_filter(self):
+        # filter is a pass-through operator: the child Project's attributes
+        # flow through unchanged, so the tagged reference resolves.
+        df = self.spark.sql("SELECT 1 AS c UNION ALL SELECT 2 AS c")
+        rows = df.filter(df.c > 0).select(df.c).collect()
+        self.assertEqual(sorted(r.c for r in rows), [1, 2])
+
+    def test_resolve_through_sort(self):
+        # sort is also a pass-through operator.
+        df = self.spark.sql("SELECT 2 AS c UNION ALL SELECT 1 AS c")
+        rows = df.sort(df.c).select(df.c).collect()
+        self.assertEqual([r.c for r in rows], [1, 2])
+
+    def test_resolve_through_distinct(self):
+        # distinct preserves attribute identity for column resolution.
+        df = self.spark.sql("SELECT 1 AS c UNION ALL SELECT 1 AS c")
+        rows = df.distinct().select(df.c).collect()
+        self.assertEqual([r.c for r in rows], [1])
+
+    def test_resolve_after_groupby_count(self):
+        # groupBy("c").count() preserves the grouping key's attribute id, so
+        # the tagged reference resolves.
+        df = self.spark.sql("SELECT 1 AS c UNION ALL SELECT 1 AS c UNION ALL SELECT 2 AS c")
+        rows = df.groupBy("c").count().select(df.c).collect()
+        self.assertEqual(sorted(r.c for r in rows), [1, 2])
+
+    def test_resolve_after_agg_alias_shadow(self):
+        # An aggregate output aliased `c` collides by name with the source
+        # `c`, but the tagged `df.c` still references the aggregated-away
+        # source attribute, so it cannot resolve.
+        # Connect lenient diverges: name-based fallback resolves the
+        # aliased name (overridden in the lenient parity suite).
+        df = self.spark.sql("SELECT 1 AS c")
+        with self.assertRaises(AnalysisException):
+            df.groupBy().agg(sf.sum("c").alias("c")).select(df.c).collect()
+
+    def test_resolve_after_pivot(self):
+        # pivot preserves the grouping key's attribute id, so the tagged
+        # reference resolves.
+        df = self.spark.sql(
+            "SELECT 1 AS c, 'a' AS k, 10 AS v UNION ALL SELECT 2 AS c, 'b' AS k, 20 AS v"
+        )
+        rows = df.groupBy("c").pivot("k").sum("v").select(df.c).collect()
+        self.assertEqual(sorted(r.c for r in rows), [1, 2])
+
+    def test_resolve_after_union(self):
+        # Union's output keeps the left child's attribute ids
+        # (Union.mergeChildOutputs), so Classic resolves the tagged
+        # left-side reference directly against that output and succeeds.
+        # Connect resolves by walking the plan tree for the plan id but
+        # treats Union as a leaf (ColumnResolutionHelper), so the id below
+        # the Union is never found and it raises in both modes (overridden
+        # there).
+        df1 = self.spark.sql("SELECT 1 AS c")
+        df2 = self.spark.sql("SELECT 2 AS c")
+        rows = df1.union(df2).select(df1.c).collect()
+        self.assertEqual(sorted(r.c for r in rows), [1, 2])
+
+    def test_resolve_after_intersect(self):
+        # Intersect's output also keeps the left child's attribute ids
+        # (Intersect.mergeChildOutputs). Unlike Union, it is not treated as
+        # a leaf during plan-id resolution, so Connect's tree walk descends
+        # into the left child, finds the tagged node and resolves it; all
+        # modes succeed.
+        df1 = self.spark.sql("SELECT 1 AS c UNION ALL SELECT 2 AS c")
+        df2 = self.spark.sql("SELECT 2 AS c UNION ALL SELECT 3 AS c")
+        rows = df1.intersect(df2).select(df1.c).collect()
+        self.assertEqual([r.c for r in rows], [2])
+
+    def test_resolve_self_join_alias(self):
+        # Both self-join sides originate from the same plan-id-tagged
+        # ancestor, yielding two equal-depth candidates with the same
+        # attribute id. Disambiguation cannot tiebreak and all modes raise
+        # an ambiguous-reference error.
+        df = self.spark.sql("SELECT 1 AS c UNION ALL SELECT 2 AS c")
+        a, b = df.alias("a"), df.alias("b")
+        with self.assertRaises(AnalysisException):
+            a.join(b, a.c == b.c).select(df.c).collect()
+
+    def test_resolve_after_subquery_view(self):
+        # Persisting the DataFrame as a temp view and reading it back via
+        # table() produces a new plan; the tagged reference still resolves in
+        # all modes.
+        view = f"v_{uuid.uuid4().hex}"
+        df = self.spark.sql("SELECT 1 AS c")
+        df.createOrReplaceTempView(view)
+        try:
+            rows = self.spark.table(view).select(df.c).collect()
+            self.assertEqual([r.c for r in rows], [1])
+        finally:
+            self.spark.sql(f"DROP VIEW IF EXISTS {view}")
+
+    def test_resolve_cross_dataframe_illegal_reference(self):
+        # Referencing a column from a DataFrame whose plan id is not an
+        # ancestor of the target plan (`df1.select(df2.id)`) fails in all
+        # modes; the strict / lenient switch does not gate this throw.
+        df1 = self.spark.range(3)
+        df2 = self.spark.range(5)
+        with self.assertRaises(AnalysisException):
+            df1.select(df2.id).collect()
+
+    def test_resolve_df_star(self):
+        # `df["*"]` is an UnresolvedDataFrameStar carrying df's plan id; the
+        # analyzer expands it to the matched node's output in all modes.
+        df = self.spark.sql(
+            "SELECT 'Books' AS c, 100 AS v UNION ALL SELECT 'Electronics' AS c, 200 AS v"
+        )
+        rows = df.select(df["*"]).collect()
+        self.assertEqual(sorted((r.c, r.v) for r in rows), [("Books", 100), ("Electronics", 200)])
+
+    def test_resolve_self_join_withcolumnrenamed(self):
+        # Documented ColumnResolutionHelper case: df1 = range(10) + col `a`;
+        # df2 = df1 renamed `a` -> `b`; df1.join(df2, df1.a == df2.b). The
+        # node with df1's plan id is found on both Join sides; the right
+        # candidate is filtered out because its `a` is not in the renaming
+        # Project's output, so disambiguation succeeds in all modes.
+        df1 = self.spark.range(10).withColumn("a", sf.col("id"))
+        df2 = df1.withColumnRenamed("a", "b")
+        rows = df1.join(df2, df1.a == df2.b).select(df1.a, df2.b).collect()
+        self.assertEqual(len(rows), 10)
+
+    def test_resolve_sort_missing_attr_recovery(self):
+        # Documented ColumnResolutionHelper case: df.select(df.v).sort(df.id)
+        # where df.id is not in the select's output. The analyzer descends
+        # through the Project, resolves df.id via plan id at the source, and
+        # adds it back to the upstream projection. Works in all modes.
+        df = self.spark.range(10).withColumn("v", sf.col("id") + 1)
+        rows = df.select(df.v).sort(df.id).collect()
+        self.assertEqual(len(rows), 10)
+
 
 class ColumnTests(ColumnTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_errors.py b/python/pyspark/sql/tests/test_errors.py
index 8bc029cd54390..ce8aeb4499844 100644
--- a/python/pyspark/sql/tests/test_errors.py
+++ b/python/pyspark/sql/tests/test_errors.py
@@ -52,7 +52,7 @@ def test_spark_runtime_exception(self):
                 self.spark.sql("select cast('abc' as boolean)").show()
 
 
-class ErrorsTests(ReusedSQLTestCase, ErrorsTestsMixin):
+class ErrorsTests(ErrorsTestsMixin, ReusedSQLTestCase):
     pass
 
 
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 0562070379b3b..75824d3ebe491 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -3431,6 +3431,7 @@ def check(resultDf, expected):
             self.assertEqual([r[0] for r in resultDf.collect()], expected)
 
         check(df.select(F.is_variant_null(v)), [False, False])
+        check(df.select(F.is_valid_variant(v)), [True, True])
         check(df.select(F.schema_of_variant(v)), ["OBJECT<a: BIGINT>", "OBJECT<b: BIGINT>"])
         check(df.select(F.schema_of_variant_agg(v)), ["OBJECT<a: BIGINT, b: BIGINT>"])
 
@@ -3847,16 +3848,28 @@ def test_string_validation(self):
 
     def test_st_asbinary(self):
         df = self.spark.createDataFrame(
-            [(bytes.fromhex("0101000000000000000000F03F0000000000000040"),)],
-            ["wkb"],
+            [(bytes.fromhex("0101000000000000000000F03F0000000000000040"), "XDR")],
+            ["wkb", "end"],
         )
         results = df.select(
             F.hex(F.st_asbinary(F.st_geogfromwkb("wkb"))),
+            F.hex(F.st_asbinary(F.st_geogfromwkb("wkb"), "NDR")),
+            F.hex(F.st_asbinary(F.st_geogfromwkb("wkb"), "XDR")),
+            F.hex(F.st_asbinary(F.st_geogfromwkb("wkb"), F.col("end"))),
             F.hex(F.st_asbinary(F.st_geomfromwkb("wkb"))),
+            F.hex(F.st_asbinary(F.st_geomfromwkb("wkb"), "NDR")),
+            F.hex(F.st_asbinary(F.st_geomfromwkb("wkb"), "XDR")),
+            F.hex(F.st_asbinary(F.st_geomfromwkb("wkb"), F.col("end"))),
         ).collect()
         expected = Row(
             "0101000000000000000000F03F0000000000000040",
             "0101000000000000000000F03F0000000000000040",
+            "00000000013FF00000000000004000000000000000",
+            "00000000013FF00000000000004000000000000000",
+            "0101000000000000000000F03F0000000000000040",
+            "0101000000000000000000F03F0000000000000040",
+            "00000000013FF00000000000004000000000000000",
+            "00000000013FF00000000000004000000000000000",
         )
         self.assertEqual(results, [expected])
 
@@ -3981,7 +3994,7 @@ def test_max_by_min_by_with_k(self):
         self.assertEqual(result[1][1], ["Frank", "Dave"])  # Sales
 
 
-class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
+class FunctionsTests(FunctionsTestsMixin, ReusedSQLTestCase):
     pass
 
 
diff --git a/python/pyspark/sql/tests/test_nearest_by_join.py b/python/pyspark/sql/tests/test_nearest_by_join.py
new file mode 100644
index 0000000000000..5e5236c213fbd
--- /dev/null
+++ b/python/pyspark/sql/tests/test_nearest_by_join.py
@@ -0,0 +1,263 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from pyspark.errors import AnalysisException
+from pyspark.sql import Row
+from pyspark.sql import functions as sf
+from pyspark.testing import assertDataFrameEqual
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class NearestByJoinTestsMixin:
+    """Mixin run against both classic (`ReusedSQLTestCase`) and Connect
+    (`ReusedConnectTestCase`) to ensure parity between the two paths."""
+
+    @property
+    def users(self):
+        return self.spark.createDataFrame([(1, 10.0), (2, 20.0), (3, 30.0)], ["user_id", "score"])
+
+    @property
+    def products(self):
+        return self.spark.createDataFrame(
+            [("A", 11.0), ("B", 22.0), ("C", 5.0)], ["product", "pscore"]
+        )
+
+    def test_inner_similarity_k1(self):
+        users, products = self.users, self.products
+        result = (
+            users.nearestByJoin(
+                products,
+                -sf.abs(users.score - products.pscore),
+                numResults=1,
+                mode="approx",
+                direction="similarity",
+            )
+            .select("user_id", "product")
+            .orderBy("user_id")
+        )
+        assertDataFrameEqual(
+            result,
+            [Row(user_id=1, product="A"), Row(user_id=2, product="B"), Row(user_id=3, product="B")],
+        )
+
+    def test_inner_distance_k2(self):
+        users, products = self.users, self.products
+        result = (
+            users.nearestByJoin(
+                products,
+                sf.abs(users.score - products.pscore),
+                numResults=2,
+                mode="approx",
+                direction="distance",
+            )
+            .select("user_id", "product")
+            .orderBy("user_id", "product")
+        )
+        assertDataFrameEqual(
+            result,
+            [
+                Row(user_id=1, product="A"),
+                Row(user_id=1, product="C"),
+                Row(user_id=2, product="A"),
+                Row(user_id=2, product="B"),
+                Row(user_id=3, product="A"),
+                Row(user_id=3, product="B"),
+            ],
+        )
+
+    def test_left_outer_with_empty_right(self):
+        users, products = self.users, self.products
+        empty = products.filter(sf.lit(False))
+        result = (
+            users.nearestByJoin(
+                empty,
+                -sf.abs(users.score - empty.pscore),
+                numResults=1,
+                mode="exact",
+                direction="similarity",
+                joinType="leftouter",
+            )
+            .select("user_id", "product")
+            .orderBy("user_id")
+        )
+        assertDataFrameEqual(
+            result,
+            [
+                Row(user_id=1, product=None),
+                Row(user_id=2, product=None),
+                Row(user_id=3, product=None),
+            ],
+        )
+
+    def test_select_star_schema_has_no_internal_columns(self):
+        users, products = self.users, self.products
+        result = users.nearestByJoin(
+            products,
+            -sf.abs(users.score - products.pscore),
+            numResults=1,
+            mode="exact",
+            direction="similarity",
+        )
+        # No `__qid`, `__nearest_matches__`, or other rewrite-internal columns leak through.
+        assert sorted(result.columns) == ["product", "pscore", "score", "user_id"]
+
+    def test_invalid_num_results_low(self):
+        users, products = self.users, self.products
+        with self.assertRaises(AnalysisException) as pe:
+            users.nearestByJoin(
+                products,
+                -sf.abs(users.score - products.pscore),
+                numResults=0,
+                mode="approx",
+                direction="similarity",
+            )
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+            messageParameters={"numResults": "0", "min": "1", "max": "100000"},
+        )
+
+    def test_invalid_num_results_high(self):
+        users, products = self.users, self.products
+        with self.assertRaises(AnalysisException) as pe:
+            users.nearestByJoin(
+                products,
+                -sf.abs(users.score - products.pscore),
+                numResults=200000,
+                mode="approx",
+                direction="similarity",
+            )
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+            messageParameters={"numResults": "200000", "min": "1", "max": "100000"},
+        )
+
+    def test_invalid_join_type(self):
+        users, products = self.users, self.products
+        with self.assertRaises(AnalysisException) as pe:
+            users.nearestByJoin(
+                products,
+                -sf.abs(users.score - products.pscore),
+                numResults=1,
+                mode="approx",
+                direction="similarity",
+                joinType="outer",
+            )
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+            messageParameters={"joinType": "outer", "supported": "'INNER', 'LEFT OUTER'"},
+        )
+
+    def test_invalid_mode(self):
+        users, products = self.users, self.products
+        with self.assertRaises(AnalysisException) as pe:
+            users.nearestByJoin(
+                products,
+                -sf.abs(users.score - products.pscore),
+                numResults=1,
+                mode="bogus",
+                direction="similarity",
+            )
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NEAREST_BY_JOIN.UNSUPPORTED_MODE",
+            messageParameters={"mode": "bogus", "supported": "'approx', 'exact'"},
+        )
+
+    def test_invalid_direction(self):
+        users, products = self.users, self.products
+        with self.assertRaises(AnalysisException) as pe:
+            users.nearestByJoin(
+                products,
+                -sf.abs(users.score - products.pscore),
+                numResults=1,
+                mode="approx",
+                direction="elsewhere",
+            )
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NEAREST_BY_JOIN.UNSUPPORTED_DIRECTION",
+            messageParameters={
+                "direction": "elsewhere",
+                "supported": "'distance', 'similarity'",
+            },
+        )
+
+    def test_rejected_when_crossjoin_disabled(self):
+        users, products = self.users, self.products
+        with self.sql_conf({"spark.sql.crossJoin.enabled": "false"}):
+            with self.assertRaises(AnalysisException) as pe:
+                users.nearestByJoin(
+                    products,
+                    -sf.abs(users.score - products.pscore),
+                    numResults=1,
+                    mode="exact",
+                    direction="similarity",
+                ).collect()
+            self.check_error(
+                exception=pe.exception,
+                errorClass="NEAREST_BY_JOIN.CROSS_JOIN_NOT_ENABLED",
+                messageParameters={},
+            )
+
+    def test_exact_with_nondeterministic_ranking_accepted(self):
+        users, products = self.users, self.products
+        # Result rows are nondeterministic; only assert that each left row gets exactly one match.
+        count = users.nearestByJoin(
+            products,
+            sf.rand(0) + products.pscore,
+            numResults=1,
+            mode="exact",
+            direction="similarity",
+        ).count()
+        self.assertEqual(count, 3)
+
+    def test_streaming_inputs_rejected(self):
+        streaming_users = (
+            self.spark.readStream.format("rate")
+            .option("rowsPerSecond", 1)
+            .load()
+            .selectExpr("CAST(value AS INT) AS user_id", "CAST(value AS DOUBLE) AS score")
+        )
+        products = self.products
+        with self.assertRaises(AnalysisException) as pe:
+            # `.schema` forces analysis without starting the streaming query.
+            _ = streaming_users.nearestByJoin(
+                products,
+                -sf.abs(streaming_users.score - products.pscore),
+                numResults=1,
+                mode="exact",
+                direction="similarity",
+            ).schema
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NEAREST_BY_JOIN.STREAMING_NOT_SUPPORTED",
+            messageParameters={},
+        )
+
+
+class NearestByJoinTests(NearestByJoinTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.testing import main
+
+    main()
diff --git a/python/pyspark/sql/tests/test_resources.py b/python/pyspark/sql/tests/test_resources.py
index 3adb33a9904b7..1e9e9f796b5f1 100644
--- a/python/pyspark/sql/tests/test_resources.py
+++ b/python/pyspark/sql/tests/test_resources.py
@@ -32,7 +32,7 @@
     not have_pandas or not have_pyarrow,
     pandas_requirement_message or pyarrow_requirement_message,
 )
-class ResourceProfileTestsMixin(object):
+class ResourceProfileTestsMixin:
     def test_map_in_arrow_without_profile(self):
         def func(iterator):
             tc = TaskContext.get()
diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py
index 3606056f6793d..fb86deb33a2da 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -616,6 +616,32 @@ def test_create_sessions_share_spark_context(self):
         finally:
             session2.stop()
 
+    def test_create_does_not_construct_spark_conf_when_session_exists(self):
+        """Ensure SparkConf() is not called when a valid session already exists."""
+        self.session = self._get_builder().create()
+        with unittest.mock.patch("pyspark.sql.session.SparkConf") as mock_spark_conf:
+            session2 = self._get_builder().create()
+            try:
+                mock_spark_conf.assert_not_called()
+                self.assertIs(session2.sparkContext, self.session.sparkContext)
+            finally:
+                session2.stop()
+
+    def test_create_applies_mutable_conf_to_second_session(self):
+        """
+        Ensure that mutable SQL configs passed to create() are applied per-session
+        even when a valid SparkSession already exists.
+        """
+        key = "spark.sql.shuffle.partitions"
+        self.session = self._get_builder().config(key, "5").create()
+        self.assertEqual(self.session.conf.get(key), "5")
+        session2 = self._get_builder().config(key, "7").create()
+        try:
+            self.assertEqual(session2.conf.get(key), "7")
+            self.assertIs(session2.sparkContext, self.session.sparkContext)
+        finally:
+            session2.stop()
+
 
 class SparkSessionProfileTests(unittest.TestCase, PySparkErrorTestUtils):
     def setUp(self):
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 26a6e169f74f3..826c66d5a306d 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -67,6 +67,7 @@
     GeographyType,
     GeometryType,
     NullType,
+    UserDefinedType,
     VariantType,
     VariantVal,
     _create_row,
@@ -1060,6 +1061,16 @@ def check_datatype(datatype):
         _make_type_verifier(PythonOnlyUDT())(PythonOnlyPoint(1.0, 2.0))
         self.assertRaises(ValueError, lambda: _make_type_verifier(PythonOnlyUDT())([1.0, 2.0]))
 
+    def test_udt_from_json_import_type_mismatch(self):
+        json = {
+            "type": "udt",
+            "pyClass": "random.random",
+            "serializedClass": "",
+            "sqlType": StringType().jsonValue(),
+        }
+        with self.assertRaises(PySparkTypeError):
+            UserDefinedType.fromJson(json)
+
     def test_simple_udt_in_df(self):
         schema = StructType().add("key", LongType()).add("val", PythonOnlyUDT())
         df = self.spark.createDataFrame(
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 42a24d08b5ec1..ce7a42469e18b 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -58,7 +58,7 @@
 from pyspark.util import is_remote_only
 
 
-class BaseUDFTestsMixin(object):
+class BaseUDFTestsMixin:
     def test_udf_with_callable(self):
         data = self.spark.createDataFrame([(i, i**2) for i in range(10)], ["number", "squared"])
 
diff --git a/python/pyspark/sql/tests/test_udf_profiler.py b/python/pyspark/sql/tests/test_udf_profiler.py
index 2900860d43d2c..feda3958224ca 100644
--- a/python/pyspark/sql/tests/test_udf_profiler.py
+++ b/python/pyspark/sql/tests/test_udf_profiler.py
@@ -300,6 +300,19 @@ def add1(x):
             for id in self.profile_results:
                 self.assert_udf_profile_present(udf_id=id, expected_line_count_prefix=10)
 
+    def test_perf_profiler_udf_without_module(self):
+        @udf("long")
+        def add1(x):
+            return x + 1
+
+        add1.__module__ = None
+
+        with self.sql_conf({"spark.sql.pyspark.udf.profiler": "perf"}):
+            df = self.spark.range(10, numPartitions=2).select(add1("id"))
+            df.collect()
+
+        self.assertEqual(1, len(self.profile_results), str(self.profile_results.keys()))
+
     @unittest.skipIf(
         not have_pandas or not have_pyarrow,
         pandas_requirement_message or pyarrow_requirement_message,
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index e9fb2b7c01189..3454a5f8b66c7 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -1872,7 +1872,7 @@ def test_assert_schema_equal_with_decimal_types(self):
             assertSchemaEqual(s1, s2)
 
 
-class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin):
+class UtilsTests(UtilsTestsMixin, ReusedSQLTestCase):
     pass
 
 
diff --git a/python/pyspark/sql/tvf.py b/python/pyspark/sql/tvf.py
index 97d8ce59913be..90874578a3562 100644
--- a/python/pyspark/sql/tvf.py
+++ b/python/pyspark/sql/tvf.py
@@ -550,12 +550,8 @@ def sql_keywords(self) -> DataFrame:
 
         Examples
         --------
-        >>> spark.tvf.sql_keywords().show()
-        +-------------+--------+
-        |      keyword|reserved|
-        +-------------+--------+
-        ...
-        +-------------+--------+...
+        >>> spark.tvf.sql_keywords().columns
+        ['keyword', 'reserved']
         """
         return self._fn("sql_keywords")
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 02a1af77906d8..6b9020b3b1041 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1996,6 +1996,11 @@ def fromJson(cls, json: Dict[str, Any]) -> "UserDefinedType":
             )
         else:
             UDT = getattr(m, pyClass)
+            if not (isinstance(UDT, type) and issubclass(UDT, UserDefinedType)):
+                raise PySparkTypeError(
+                    errorClass="FIELD_TYPE_MISMATCH",
+                    messageParameters={"obj": str(UDT), "data_type": "UserDefinedType"},
+                )
         return UDT()
 
 
diff --git a/python/pyspark/sql/worker/utils.py b/python/pyspark/sql/worker/utils.py
index 58f2dbb67f648..12bdb25e62529 100644
--- a/python/pyspark/sql/worker/utils.py
+++ b/python/pyspark/sql/worker/utils.py
@@ -24,7 +24,12 @@
     _deserialize_accumulator,
     SpecialAccumulatorIds,
 )
-from pyspark.sql.profiler import ProfileResultsParam, WorkerPerfProfiler, WorkerMemoryProfiler
+from pyspark.sql.profiler import (
+    ProfileResultsParam,
+    ProfileResultsParamV2,
+    WorkerPerfProfiler,
+    WorkerMemoryProfiler,
+)
 from pyspark.serializers import (
     read_int,
     write_int,
@@ -70,6 +75,10 @@ def worker_run(main: Callable, infile: IO, outfile: IO) -> None:
             SpecialAccumulatorIds.SQL_UDF_PROFIER, {}, ProfileResultsParam
         )
 
+        accumulator_v2 = _deserialize_accumulator(
+            SpecialAccumulatorIds.SQL_UDF_PROFIER_V2, {}, ProfileResultsParamV2
+        )
+
         if main.__module__ == "__main__":
             try:
                 worker_module = sys.modules["__main__"].__spec__.name  # type: ignore[union-attr]
@@ -80,10 +89,10 @@ def worker_run(main: Callable, infile: IO, outfile: IO) -> None:
         worker_module = worker_module.split(".")[-1]
 
         if conf.profiler == "perf":
-            with WorkerPerfProfiler(accumulator, worker_module):
+            with WorkerPerfProfiler(accumulator, accumulator_v2, worker_module):
                 main(infile, outfile)
         elif conf.profiler == "memory":
-            with WorkerMemoryProfiler(accumulator, worker_module, main):
+            with WorkerMemoryProfiler(accumulator, accumulator_v2, worker_module, main):
                 main(infile, outfile)
         else:
             main(infile, outfile)
diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py
index 8c1c7c3b1242e..e26a4cc83ee52 100644
--- a/python/pyspark/testing/mlutils.py
+++ b/python/pyspark/testing/mlutils.py
@@ -25,8 +25,7 @@
 from pyspark.ml.classification import Classifier, ClassificationModel
 from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
 from pyspark.ml.wrapper import _java2py
-from pyspark.sql import SparkSession
-from pyspark.sql.classic.dataframe import DataFrame
+from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.types import DoubleType
 from pyspark.testing.utils import ReusedPySparkTestCase as PySparkTestCase
 
@@ -100,6 +99,11 @@ def tearDownClass(cls):
 
 
 class MockDataset(DataFrame):
+    def __new__(cls, *args, **kwargs):
+        # DataFrame by default creates classic DataFrame, we need this to
+        # overwrite the default behavior.
+        return object.__new__(cls)
+
     def __init__(self):
         self.index = 0
 
diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
index 9f8b425eb81b0..7c73e4db802f1 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -321,6 +321,17 @@ def compare_vals_approx(val1, val2):
 
 
 class PandasOnSparkTestUtils:
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        # Pin pandas display options so native pandas `repr` output is stable and never
+        # truncated, matching pandas-on-Spark's repr. PandasOnSparkTestCase sets these in
+        # its own setUpClass, but the Spark Connect parity tests mix in this class with
+        # ReusedConnectTestCase (which does not), so set them here to cover both paths.
+        pd.set_option("display.max_columns", None)  # never truncate columns
+        pd.set_option("display.expand_frame_repr", False)  # avoid line wrapping
+        pd.set_option("display.show_dimensions", False)  # hide [N rows x M cols]
+
     def convert_str_to_lambda(self, func: str):
         """
         This function converts `func` str to lambda call
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 9443f6f452071..42e21847f472b 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "4.2.0.1-4.3.0-0"
+__version__: str = "4.2.0.1+4.3.0.1"
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 95a7ccdc4f8dc..0da0e0ccc35c4 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -652,33 +652,6 @@ def verify_arrow_batch(batch, assign_cols_by_name, expected_cols_and_types):
     verify_arrow_result(batch, assign_cols_by_name, expected_cols_and_types)
 
 
-def wrap_grouped_map_pandas_udf(f, return_type, argspec, runner_conf):
-    def wrapped(key_series, value_series):
-        import pandas as pd
-
-        value_df = pd.concat(value_series, axis=1)
-
-        if len(argspec.args) == 1:
-            result = f(value_df)
-        elif len(argspec.args) == 2:
-            # Extract key from pandas Series, preserving numpy types
-            key = tuple(s.iloc[0] for s in key_series)
-            result = f(key, value_df)
-
-        verify_pandas_result(
-            result, return_type, runner_conf.assign_cols_by_name, truncate_return_schema=False
-        )
-
-        yield result
-
-    def flatten_wrapper(k, v):
-        # Return Iterator[[(df, spark_type)]] directly
-        for df in wrapped(k, v):
-            yield [(df, return_type)]
-
-    return flatten_wrapper
-
-
 def wrap_grouped_map_pandas_iter_udf(f, return_type, argspec, runner_conf):
     def wrapped(key_series, value_batches):
         import pandas as pd
@@ -1013,10 +986,13 @@ def _is_iter_based(eval_type: int) -> bool:
 
 
 def wrap_perf_profiler(f, eval_type, result_id):
-    from pyspark.sql.profiler import ProfileResultsParam, WorkerPerfProfiler
+    from pyspark.sql.profiler import ProfileResultsParam, ProfileResultsParamV2, WorkerPerfProfiler
 
     accumulator = _deserialize_accumulator(
-        SpecialAccumulatorIds.SQL_UDF_PROFIER, {}, ProfileResultsParam
+        SpecialAccumulatorIds.SQL_UDF_PROFIER, None, ProfileResultsParam
+    )
+    accumulator_v2 = _deserialize_accumulator(
+        SpecialAccumulatorIds.SQL_UDF_PROFIER_V2, {}, ProfileResultsParamV2
     )
 
     if _is_iter_based(eval_type):
@@ -1025,7 +1001,7 @@ def profiling_func(*args, **kwargs):
             iterator = iter(f(*args, **kwargs))
             while True:
                 try:
-                    with WorkerPerfProfiler(accumulator, result_id):
+                    with WorkerPerfProfiler(accumulator, accumulator_v2, result_id):
                         item = next(iterator)
                     yield item
                 except StopIteration:
@@ -1034,7 +1010,7 @@ def profiling_func(*args, **kwargs):
     else:
 
         def profiling_func(*args, **kwargs):
-            with WorkerPerfProfiler(accumulator, result_id):
+            with WorkerPerfProfiler(accumulator, accumulator_v2, result_id):
                 ret = f(*args, **kwargs)
             return ret
 
@@ -1042,7 +1018,11 @@ def profiling_func(*args, **kwargs):
 
 
 def wrap_memory_profiler(f, eval_type, result_id):
-    from pyspark.sql.profiler import ProfileResultsParam, WorkerMemoryProfiler
+    from pyspark.sql.profiler import (
+        ProfileResultsParam,
+        ProfileResultsParamV2,
+        WorkerMemoryProfiler,
+    )
 
     import pyspark.memory_profiler_ext
 
@@ -1050,7 +1030,11 @@ def wrap_memory_profiler(f, eval_type, result_id):
         return f
 
     accumulator = _deserialize_accumulator(
-        SpecialAccumulatorIds.SQL_UDF_PROFIER, {}, ProfileResultsParam
+        SpecialAccumulatorIds.SQL_UDF_PROFIER, None, ProfileResultsParam
+    )
+
+    accumulator_v2 = _deserialize_accumulator(
+        SpecialAccumulatorIds.SQL_UDF_PROFIER_V2, {}, ProfileResultsParamV2
     )
 
     if _is_iter_based(eval_type):
@@ -1061,7 +1045,7 @@ def profiling_func(*args, **kwargs):
 
             while True:
                 try:
-                    with WorkerMemoryProfiler(accumulator, result_id, g.gi_code):
+                    with WorkerMemoryProfiler(accumulator, accumulator_v2, result_id, g.gi_code):
                         item = next(iterator)
                     yield item
                 except StopIteration:
@@ -1070,7 +1054,7 @@ def profiling_func(*args, **kwargs):
     else:
 
         def profiling_func(*args, **kwargs):
-            with WorkerMemoryProfiler(accumulator, result_id, f):
+            with WorkerMemoryProfiler(accumulator, accumulator_v2, result_id, f):
                 ret = f(*args, **kwargs)
             return ret
 
@@ -1102,7 +1086,8 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
 
     # If chained_func is from pyspark.sql.worker, it is to read/write data source.
     # In this case, we check the data_source_profiler config.
-    if getattr(chained_func, "__module__", "").startswith("pyspark.sql.worker."):
+    module = getattr(chained_func, "__module__", "")
+    if isinstance(module, str) and module.startswith("pyspark.sql.worker."):
         profiler = runner_conf.data_source_profiler
     else:
         profiler = runner_conf.udf_profiler
@@ -1139,8 +1124,8 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
     elif eval_type == PythonEvalType.SQL_MAP_ARROW_ITER_UDF:
         return func, None, None, None
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
-        argspec = inspect.getfullargspec(chained_func)  # signature was lost when wrapping it
-        return args_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec, runner_conf)
+        num_udf_args = len(inspect.getfullargspec(chained_func).args)
+        return func, args_offsets, return_type, num_udf_args
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_ITER_UDF:
         argspec = inspect.getfullargspec(chained_func)  # signature was lost when wrapping it
         return args_offsets, wrap_grouped_map_pandas_iter_udf(
@@ -2393,6 +2378,7 @@ def read_udfs(pickleSer, infile, eval_type, runner_conf, eval_conf):
     ):
         # NOTE: if timezone is set here, that implies respectSessionTimeZone is True
         if eval_type in (
+            PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
             PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF,
             PythonEvalType.SQL_GROUPED_MAP_ARROW_ITER_UDF,
             PythonEvalType.SQL_GROUPED_AGG_ARROW_UDF,
@@ -2413,10 +2399,7 @@ def read_udfs(pickleSer, infile, eval_type, runner_conf, eval_conf):
                 prefer_int_ext_dtype=runner_conf.prefer_int_ext_dtype,
                 int_to_decimal_coercion_enabled=runner_conf.int_to_decimal_coercion_enabled,
             )
-        elif (
-            eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
-            or eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_ITER_UDF
-        ):
+        elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_ITER_UDF:
             ser = GroupPandasUDFSerializer(
                 timezone=runner_conf.timezone,
                 safecheck=runner_conf.safecheck,
@@ -2943,6 +2926,77 @@ def grouped_func(
         # profiling is not supported for UDF
         return grouped_func, None, ser, ser
 
+    if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+        import pyarrow as pa
+        import pandas as pd
+
+        assert num_udfs == 1, "One GROUPED_MAP_PANDAS UDF expected here."
+        grouped_udf, arg_offsets, return_type, num_udf_args = udfs[0]
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+        assert len(parsed_offsets) == 1, "Expected one pair of offsets for GROUPED_MAP_PANDAS UDF."
+
+        key_offsets = parsed_offsets[0][0]
+        value_offsets = parsed_offsets[0][1]
+        output_schema = StructType([StructField("_0", return_type)])
+
+        def grouped_func(
+            split_index: int,
+            data: Iterator[Iterator[pa.RecordBatch]],
+        ) -> Iterator[pa.RecordBatch]:
+            """Apply groupBy Pandas UDF (non-iterator variant).
+
+            The explicit ``del`` calls below keep peakmem bounded across
+            groups. Without them, generator locals from the previous
+            iteration stay bound on the frame until each statement in
+            the next iteration rebinds its slot, so the input-side
+            DataFrames overlap with the next group's allocations and
+            the working set grows unbounded on wide-column, large-group
+            inputs. ``del result`` runs on resume from yield, before
+            ``data.__next__()`` is asked for the next group.
+            """
+            for group in data:
+                all_batches = list(group)
+                if all_batches:
+                    table = pa.Table.from_batches(all_batches).combine_chunks()
+                else:
+                    table = pa.table({})
+                all_series = ArrowBatchTransformer.to_pandas(
+                    table,
+                    timezone=runner_conf.timezone,
+                    prefer_int_ext_dtype=runner_conf.prefer_int_ext_dtype,
+                )
+                value_df = pd.concat([all_series[o] for o in value_offsets], axis=1)
+
+                if num_udf_args == 1:
+                    result = grouped_udf(value_df)
+                else:
+                    key = tuple(all_series[o].iloc[0] for o in key_offsets)
+                    result = grouped_udf(key, value_df)
+
+                del all_batches, table, all_series, value_df
+
+                verify_pandas_result(
+                    result,
+                    return_type,
+                    runner_conf.assign_cols_by_name,
+                    truncate_return_schema=False,
+                )
+
+                yield PandasToArrowConversion.convert(
+                    [result],
+                    output_schema,
+                    timezone=runner_conf.timezone,
+                    safecheck=runner_conf.safecheck,
+                    arrow_cast=True,
+                    prefers_large_types=runner_conf.use_large_var_types,
+                    assign_cols_by_name=runner_conf.assign_cols_by_name,
+                    int_to_decimal_coercion_enabled=runner_conf.int_to_decimal_coercion_enabled,
+                )
+                del result
+
+        # profiling is not supported for UDF
+        return grouped_func, None, ser, ser
+
     if (
         eval_type == PythonEvalType.SQL_ARROW_BATCHED_UDF
         and not runner_conf.use_legacy_pandas_udf_conversion
@@ -3187,39 +3241,7 @@ def map_batch(batch):
         # profiling is not supported for UDF
         return func, None, ser, ser
 
-    if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
-        import pyarrow as pa
-
-        # We assume there is only one UDF here because grouped map doesn't
-        # support combining multiple UDFs.
-        assert num_udfs == 1
-
-        # See FlatMapGroupsInPandasExec for how arg_offsets are used to
-        # distinguish between grouping attributes and data attributes
-        arg_offsets, f = udfs[0]
-        parsed_offsets = extract_key_value_indexes(arg_offsets)
-
-        key_offsets = parsed_offsets[0][0]
-        value_offsets = parsed_offsets[0][1]
-
-        def mapper(batch_iter):
-            # Collect all Arrow batches and merge at Arrow level
-            all_batches = list(batch_iter)
-            if all_batches:
-                table = pa.Table.from_batches(all_batches).combine_chunks()
-            else:
-                table = pa.table({})
-            # Convert to pandas once for the entire group
-            all_series = ArrowBatchTransformer.to_pandas(
-                table,
-                timezone=ser._timezone,
-                prefer_int_ext_dtype=runner_conf.prefer_int_ext_dtype,
-            )
-            key_series = [all_series[o] for o in key_offsets]
-            value_series = [all_series[o] for o in value_offsets]
-            yield from f(key_series, value_series)
-
-    elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_ITER_UDF:
+    if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_ITER_UDF:
         import pyarrow as pa
 
         # We assume there is only one UDF here because grouped map doesn't
diff --git a/repl/pom.xml b/repl/pom.xml
index c3a4fd50a3970..9558939a61a6d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 6c170c8d659c3..459c0818e52d1 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 6f1130853c5a1..0e79cd0d5606c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -262,6 +262,33 @@ private[spark] object Config extends Logging {
       .checkValue(v => 0 < v && v <= 1, "The factor should be in (0, 1]")
       .createWithDefault(0.1)
 
+  val PVC_RESIZE_INTERVAL =
+    ConfigBuilder("spark.kubernetes.executor.pvc.resizeInterval")
+      .doc("Interval between executor PVC resize operations, in minutes. " +
+        "Defaults to 5 minutes. Set to 0 to disable. " +
+        "Must be 0 or a positive multiple of 5 minutes.")
+      .version("4.2.0")
+      .timeConf(TimeUnit.MINUTES)
+      .checkValue(v => v >= 0 && v % 5 == 0,
+        "Interval must be 0 or a positive multiple of 5 minutes")
+      .createWithDefault(5)
+
+  val PVC_RESIZE_THRESHOLD =
+    ConfigBuilder("spark.kubernetes.executor.pvc.resizeThreshold")
+      .doc("The PVC usage ratio (used / capacity) above which the driver triggers a resize.")
+      .version("4.2.0")
+      .doubleConf
+      .checkValue(v => 0 < v && v < 1, "The threshold should be in (0, 1)")
+      .createWithDefault(0.5)
+
+  val PVC_RESIZE_FACTOR =
+    ConfigBuilder("spark.kubernetes.executor.pvc.resizeFactor")
+      .doc("The factor to grow PVC storage by, relative to the current request.")
+      .version("4.2.0")
+      .doubleConf
+      .checkValue(v => 0 < v && v <= 1, "The factor should be in (0, 1]")
+      .createWithDefault(1.0)
+
   val KUBERNETES_AUTH_DRIVER_CONF_PREFIX = "spark.kubernetes.authenticate.driver"
   val KUBERNETES_AUTH_EXECUTOR_CONF_PREFIX = "spark.kubernetes.authenticate.executor"
   val KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX = "spark.kubernetes.authenticate.driver.mounted"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index 22a21c2ed4aaf..9070d6ce35ff6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -49,6 +49,8 @@ private[spark] abstract class KubernetesConf(val sparkConf: SparkConf) {
 
   def appName: String = get("spark.app.name", "spark")
 
+  def sparkVersion: String = SPARK_VERSION
+
   def namespace: String = get(KUBERNETES_NAMESPACE)
 
   def imagePullPolicy: String = get(CONTAINER_IMAGE_PULL_POLICY)
@@ -122,7 +124,7 @@ class KubernetesDriverConf(
 
   override def labels: Map[String, String] = {
     val presetLabels = Map(
-      SPARK_VERSION_LABEL -> SPARK_VERSION,
+      SPARK_VERSION_LABEL -> sparkVersion,
       SPARK_APP_ID_LABEL -> appId,
       SPARK_APP_NAME_LABEL -> KubernetesConf.getAppNameLabel(appName),
       SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE)
@@ -199,7 +201,7 @@ private[spark] class KubernetesExecutorConf(
 
   override def labels: Map[String, String] = {
     val presetLabels = Map(
-      SPARK_VERSION_LABEL -> SPARK_VERSION,
+      SPARK_VERSION_LABEL -> sparkVersion,
       SPARK_EXECUTOR_ID_LABEL -> executorId,
       SPARK_APP_ID_LABEL -> appId,
       SPARK_APP_NAME_LABEL -> KubernetesConf.getAppNameLabel(appName),
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPVCResizePlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPVCResizePlugin.scala
new file mode 100644
index 0000000000000..d515e6b53c28d
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPVCResizePlugin.scala
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler.cluster.k8s
+
+import java.util.{Map => JMap}
+import java.util.concurrent.{ConcurrentHashMap, ScheduledExecutorService, TimeUnit}
+
+import scala.jdk.CollectionConverters._
+
+import io.fabric8.kubernetes.api.model.{PersistentVolumeClaimBuilder, Pod, Quantity}
+import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.dsl.base.PatchContext
+import io.fabric8.kubernetes.client.dsl.base.PatchType
+
+import org.apache.spark.{SparkContext, SparkEnv}
+import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.LogKeys.{CONFIG, CONFIG2, CURRENT_DISK_SIZE, ORIGINAL_DISK_SIZE, PVC_METADATA_NAME}
+import org.apache.spark.util.ThreadUtils
+
+/**
+ * Spark plugin to monitor executor PVC disk usage and grow the PVC storage request
+ * when the usage exceeds a configurable threshold.
+ *
+ * Executors measure their own local-directory usage (via DiskBlockManager) and report
+ * the maximum filesystem usage ratio to the driver through the plugin RPC channel.
+ * When the ratio exceeds the threshold, the driver patches every `spark-local-dir-*`
+ * PVC mounted by the reporting executor's pod to grow its
+ * `spec.resources.requests.storage`. The underlying StorageClass must have
+ * `allowVolumeExpansion: true`.
+ */
+class ExecutorPVCResizePlugin extends SparkPlugin {
+  override def driverPlugin(): DriverPlugin = new ExecutorPVCResizeDriverPlugin()
+
+  override def executorPlugin(): ExecutorPlugin = new ExecutorPVCResizeExecutorPlugin()
+}
+
+/**
+ * Message sent from each executor to the driver with the maximum filesystem usage
+ * ratio (used / total) across the executor's SPARK_LOCAL_DIRS. The driver applies
+ * this ratio to every PVC mounted by the reporting executor's pod.
+ */
+private[k8s] case class PVCDiskUsageReport(
+    executorId: String,
+    ratio: Double)
+
+class ExecutorPVCResizeDriverPlugin extends DriverPlugin with Logging {
+  private var sparkContext: SparkContext = _
+  private var namespace: String = _
+  private var threshold: Double = _
+  private var factor: Double = _
+
+  private val latestReports = new ConcurrentHashMap[String, PVCDiskUsageReport]()
+  private val failedPvcs = ConcurrentHashMap.newKeySet[String]()
+  private val requestedSizes = new ConcurrentHashMap[String, Long]()
+
+  private val periodicService: ScheduledExecutorService =
+    ThreadUtils.newDaemonSingleThreadScheduledExecutor("pvc-resize-plugin")
+
+  override def init(sc: SparkContext, ctx: PluginContext): JMap[String, String] = {
+    val allocator = sc.conf.get(KUBERNETES_ALLOCATION_PODS_ALLOCATOR)
+    if (allocator != "direct") {
+      logWarning(log"ExecutorPVCResizePlugin requires the 'direct' pods allocator; " +
+        log"${MDC(CONFIG, KUBERNETES_ALLOCATION_PODS_ALLOCATOR.key)} is " +
+        log"${MDC(CONFIG2, allocator)}. Plugin will not start.")
+      return Map.empty[String, String].asJava
+    }
+    val interval = sc.conf.get(PVC_RESIZE_INTERVAL)
+    if (interval <= 0) {
+      logInfo("PVCResizePlugin disabled (interval <= 0).")
+      return Map.empty[String, String].asJava
+    }
+    threshold = sc.conf.get(PVC_RESIZE_THRESHOLD)
+    factor = sc.conf.get(PVC_RESIZE_FACTOR)
+    namespace = sc.conf.get(KUBERNETES_NAMESPACE)
+    sparkContext = sc
+
+    periodicService.scheduleAtFixedRate(() => if (!sparkContext.isStopped) {
+      try {
+        checkAndResizePVCs()
+      } catch {
+        case e: Throwable => logError("Error in PVC resize thread", e)
+      }
+    }, interval, interval, TimeUnit.MINUTES)
+    logInfo("ExecutorPVCResizeDriverPlugin is scheduled")
+
+    // Propagate the interval to executors so they report at the same cadence.
+    Map(PVC_RESIZE_INTERVAL.key -> interval.toString).asJava
+  }
+
+  override def receive(message: Any): AnyRef = message match {
+    case r: PVCDiskUsageReport =>
+      latestReports.put(r.executorId, r)
+      null
+    case _ =>
+      null
+  }
+
+  override def shutdown(): Unit = {
+    periodicService.shutdown()
+  }
+
+  private[k8s] def checkAndResizePVCs(): Unit = {
+    logInfo(s"Latest PVC usage reports: $latestReports")
+    val appId = sparkContext.applicationId
+
+    sparkContext.schedulerBackend match {
+      case b: KubernetesClusterSchedulerBackend =>
+        val client = b.kubernetesClient
+        val pods = client.pods()
+          .inNamespace(namespace)
+          .withLabel(SPARK_APP_ID_LABEL, appId)
+          .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
+          .list()
+          .getItems.asScala
+
+        val podByExecId = pods.flatMap { p =>
+          Option(p.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL)).map(_ -> p)
+        }.toMap
+
+        latestReports.values().asScala.foreach { report =>
+          podByExecId.get(report.executorId).foreach { pod =>
+            pvcsOf(pod).foreach { pvcName =>
+              if (!failedPvcs.contains(pvcName)) {
+                tryResize(client, pvcName, report.ratio, report.executorId)
+              }
+            }
+          }
+        }
+      case _ =>
+        logWarning("Skipping PVC resize: schedulerBackend is not " +
+          "KubernetesClusterSchedulerBackend.")
+    }
+  }
+
+  private[k8s] def pvcsOf(pod: Pod): Set[String] = {
+    val volNameToPvc = pod.getSpec.getVolumes.asScala
+      .filter(_.getPersistentVolumeClaim != null)
+      .filter(_.getName.startsWith("spark-local-dir-"))
+      .map(v => v.getName -> v.getPersistentVolumeClaim.getClaimName)
+      .toMap
+    pod.getSpec.getContainers.asScala
+      .find(_.getName == DEFAULT_EXECUTOR_CONTAINER_NAME)
+      .orElse(pod.getSpec.getContainers.asScala.headOption)
+      .toSeq
+      .flatMap(_.getVolumeMounts.asScala)
+      .flatMap(m => volNameToPvc.get(m.getName))
+      .toSet
+  }
+
+  private def tryResize(
+      client: KubernetesClient, pvcName: String, ratio: Double, execId: String): Unit = {
+    logInfo(s"Try to resize executor $execId PVC $pvcName with ratio $ratio " +
+      s"(threshold $threshold).")
+    if (ratio <= threshold) return
+    try {
+      val pvc = client.persistentVolumeClaims()
+        .inNamespace(namespace)
+        .withName(pvcName)
+        .get()
+      if (pvc == null) return
+      val current = Quantity.getAmountInBytes(
+        pvc.getSpec.getResources.getRequests.get("storage")).longValue()
+      val capacity = Option(pvc.getStatus)
+        .flatMap(s => Option(s.getCapacity))
+        .flatMap(c => Option(c.get("storage")))
+        .map(q => Quantity.getAmountInBytes(q).longValue())
+        .getOrElse(current)
+      if (current > capacity) {
+        logInfo(s"PVC $pvcName resize is in progress or failed " +
+          s"(spec=$current, status=$capacity); skip.")
+        return
+      }
+      val newSize = (current * (1.0 + factor)).toLong
+      if (requestedSizes.get(pvcName) == newSize) return
+      logInfo(log"Increase PVC ${MDC(PVC_METADATA_NAME, pvcName)} storage " +
+        log"from ${MDC(ORIGINAL_DISK_SIZE, current)} to " +
+        log"${MDC(CURRENT_DISK_SIZE, newSize)} as usage ratio exceeded threshold.")
+      val patch = new PersistentVolumeClaimBuilder()
+        .withNewSpec()
+          .withNewResources()
+            .addToRequests("storage", new Quantity(newSize.toString))
+          .endResources()
+        .endSpec()
+        .build()
+      client.persistentVolumeClaims()
+        .inNamespace(namespace)
+        .withName(pvcName)
+        .patch(PatchContext.of(PatchType.STRATEGIC_MERGE), patch)
+      requestedSizes.put(pvcName, newSize)
+    } catch {
+      case e: Throwable =>
+        failedPvcs.add(pvcName)
+        logInfo(log"Failed to expand PVC ${MDC(PVC_METADATA_NAME, pvcName)}; " +
+          log"will skip subsequent attempts.", e)
+    }
+  }
+}
+
+class ExecutorPVCResizeExecutorPlugin extends ExecutorPlugin with Logging {
+  private var pluginContext: PluginContext = _
+  private var periodicService: ScheduledExecutorService = _
+
+  override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = {
+    val intervalStr = extraConf.get(PVC_RESIZE_INTERVAL.key)
+    if (intervalStr == null) {
+      // Driver disabled the plugin; do nothing.
+      return
+    }
+    val interval = intervalStr.toLong
+    if (interval <= 0) return
+
+    pluginContext = ctx
+    periodicService =
+      ThreadUtils.newDaemonSingleThreadScheduledExecutor("pvc-resize-reporter")
+    periodicService.scheduleAtFixedRate(() => {
+      try {
+        report()
+      } catch {
+        case e: Throwable => logDebug("Failed to report PVC usage", e)
+      }
+    }, interval, interval, TimeUnit.MINUTES)
+  }
+
+  override def shutdown(): Unit = {
+    if (periodicService != null) periodicService.shutdown()
+  }
+
+  private def report(): Unit = {
+    val env = SparkEnv.get
+    if (env == null) return
+    val dirs = env.blockManager.diskBlockManager.localDirs
+    if (dirs == null || dirs.isEmpty) return
+    val maxRatio = dirs.iterator.flatMap { d =>
+      try {
+        // Skip if total is 0 (e.g. dir unmounted, statvfs failed) to avoid divide-by-zero.
+        val total = d.getTotalSpace
+        if (total > 0) Some((total - d.getUsableSpace).toDouble / total) else None
+      } catch { case _: Throwable => None }
+    }.maxOption
+    maxRatio.foreach { ratio =>
+      logInfo(s"Reporting max PVC disk usage ratio for executor ${env.executorId}: $ratio")
+      pluginContext.send(PVCDiskUsageReport(env.executorId, ratio))
+    }
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala
index 4ed34ec3e4c00..3d2822e5eb518 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala
@@ -47,6 +47,7 @@ class ExecutorPodsPollingSnapshotSource(
 
   private val pollingInterval = conf.get(KUBERNETES_EXECUTOR_API_POLLING_INTERVAL)
   private val pollingEnabled = conf.get(KUBERNETES_EXECUTOR_ENABLE_API_POLLING)
+  private val namespace = conf.get(KUBERNETES_NAMESPACE)
 
   private var pollingFuture: Future[_] = _
 
@@ -76,6 +77,7 @@ class ExecutorPodsPollingSnapshotSource(
       logDebug(s"Resynchronizing full executor pod state from Kubernetes.")
       val pods = kubernetesClient
         .pods()
+        .inNamespace(namespace)
         .withLabel(SPARK_APP_ID_LABEL, applicationId)
         .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
         .withoutLabel(SPARK_EXECUTOR_INACTIVE_LABEL, "true")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePlugin.scala
index 77fd37dd538b5..5b526db122e3f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePlugin.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePlugin.scala
@@ -30,9 +30,8 @@ import org.apache.spark.SparkContext
 import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.SparkKubernetesClientFactory
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.LogKeys.{EXECUTOR_ID, MEMORY_SIZE}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, CONFIG, CONFIG2, EXECUTOR_ID, MEMORY_SIZE}
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
@@ -47,52 +46,52 @@ class ExecutorResizePlugin extends SparkPlugin {
 
 class ExecutorResizeDriverPlugin extends DriverPlugin with Logging {
   private var sparkContext: SparkContext = _
-  private var kubernetesClient: KubernetesClient = _
 
   private val periodicService: ScheduledExecutorService =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("executor-resize-plugin")
 
   override def init(sc: SparkContext, ctx: PluginContext): JMap[String, String] = {
+    val allocator = sc.conf.get(KUBERNETES_ALLOCATION_PODS_ALLOCATOR)
+    if (allocator != "direct") {
+      logWarning(log"ExecutorResizePlugin requires the 'direct' pods allocator; " +
+        log"${MDC(CONFIG, KUBERNETES_ALLOCATION_PODS_ALLOCATOR.key)} is " +
+        log"${MDC(CONFIG2, allocator)}. Plugin will not start.")
+      return Map.empty[String, String].asJava
+    }
+
     val interval = Utils.timeStringAsSeconds(
       sc.conf.get(EXECUTOR_RESIZE_INTERVAL.key, "1m"))
     val threshold = sc.conf.getDouble(EXECUTOR_RESIZE_THRESHOLD.key, 0.9)
     val factor = sc.conf.getDouble(EXECUTOR_RESIZE_FACTOR.key, 0.1)
     val namespace = sc.conf.get(KUBERNETES_NAMESPACE)
 
+    // Scheduler is not created yet at init time; resolve it lazily in the periodic task.
     sparkContext = sc
 
-    try {
-      kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient(
-        sc.conf.get(KUBERNETES_DRIVER_MASTER_URL),
-        Option(namespace),
-        KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX,
-        SparkKubernetesClientFactory.ClientType.Driver,
-        sc.conf,
-        None)
-
-      periodicService.scheduleAtFixedRate(() => {
-        try {
-          checkAndIncreaseMemory(namespace, threshold, factor)
-        } catch {
-          case e: Throwable => logError("Error in memory check thread", e)
+    periodicService.scheduleAtFixedRate(() => if (!sparkContext.isStopped) {
+      try {
+        sparkContext.schedulerBackend match {
+          case backend: KubernetesClusterSchedulerBackend =>
+            checkAndIncreaseMemory(namespace, threshold, factor, backend.kubernetesClient)
+          case _ =>
+            logWarning(log"This plugin expects " +
+              log"${MDC(CLASS_NAME, classOf[KubernetesClusterSchedulerBackend].getSimpleName)}.")
         }
-      }, interval, interval, TimeUnit.SECONDS)
-    } catch {
-      case e: Exception =>
-        logError("Failed to initialize", e)
-    }
+      } catch {
+        case e: Throwable => logError("Error in memory check thread", e)
+      }
+    }, interval, interval, TimeUnit.SECONDS)
 
     Map.empty[String, String].asJava
   }
 
-  override def shutdown(): Unit = {
-    periodicService.shutdown()
-    if (kubernetesClient != null) {
-      kubernetesClient.close()
-    }
-  }
+  override def shutdown(): Unit = periodicService.shutdown()
 
-  private def checkAndIncreaseMemory(namespace: String, threshold: Double, factor: Double): Unit = {
+  private def checkAndIncreaseMemory(
+      namespace: String,
+      threshold: Double,
+      factor: Double,
+      kubernetesClient: KubernetesClient): Unit = {
     val appId = sparkContext.applicationId
 
     // Get all running executor pods for this application
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 50c672781b96c..0784b82a85de2 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -47,7 +47,7 @@ import org.apache.spark.util.ArrayImplicits._
 private[spark] class KubernetesClusterSchedulerBackend(
     scheduler: TaskSchedulerImpl,
     sc: SparkContext,
-    kubernetesClient: KubernetesClient,
+    private[k8s] val kubernetesClient: KubernetesClient,
     executorService: ScheduledExecutorService,
     snapshotsStore: ExecutorPodsSnapshotsStore,
     podAllocator: AbstractPodsAllocator,
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index 37778ff1bf8f1..c1b3e8d6b252b 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -298,6 +298,31 @@ class KubernetesConfSuite extends SparkFunSuite {
     }
   }
 
+  test("SPARK-56736: sparkVersion returns the runtime Spark version for driver and executor") {
+    val sparkConf = new SparkConf(false)
+    val driverConf = KubernetesTestConf.createDriverConf(sparkConf)
+    val execConf = KubernetesTestConf.createExecutorConf(sparkConf)
+    assert(driverConf.sparkVersion === SPARK_VERSION)
+    assert(execConf.sparkVersion === SPARK_VERSION)
+    assert(driverConf.labels(SPARK_VERSION_LABEL) === SPARK_VERSION)
+    assert(execConf.labels(SPARK_VERSION_LABEL) === SPARK_VERSION)
+  }
+
+  test("SPARK-56736: KubernetesDriverConf subclass can override sparkVersion") {
+    val customVersion = "9.9.9-custom"
+    val customConf = new KubernetesDriverConf(
+      new SparkConf(false),
+      KubernetesTestConf.APP_ID,
+      JavaMainAppResource(None),
+      KubernetesTestConf.MAIN_CLASS,
+      APP_ARGS,
+      None) {
+      override def sparkVersion: String = customVersion
+    }
+    assert(customConf.sparkVersion === customVersion)
+    assert(customConf.labels(SPARK_VERSION_LABEL) === customVersion)
+  }
+
   test("SPARK-52902: K8s image configs support {{SPARK_VERSION}} placeholder") {
     val sparkConf = new SparkConf(false)
     sparkConf.set(CONTAINER_IMAGE, "apache/spark:{{SPARK_VERSION}}")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPVCResizePluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPVCResizePluginSuite.scala
new file mode 100644
index 0000000000000..f587e2f47336a
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPVCResizePluginSuite.scala
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler.cluster.k8s
+
+import java.util.Collections
+
+import io.fabric8.kubernetes.api.model._
+import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.dsl.Resource
+import org.mockito.ArgumentCaptor
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito.{mock, never, times, verify, when}
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.api.plugin.PluginContext
+import org.apache.spark.deploy.k8s.Config.{KUBERNETES_ALLOCATION_PODS_ALLOCATOR, PVC_RESIZE_INTERVAL}
+import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.Fabric8Aliases._
+
+class ExecutorPVCResizePluginSuite
+    extends SparkFunSuite with BeforeAndAfter {
+
+  private val namespace = "test-namespace"
+  private val appId = "spark-test-app"
+
+  private var kubernetesClient: KubernetesClient = _
+  private var sparkContext: SparkContext = _
+  private var schedulerBackend: KubernetesClusterSchedulerBackend = _
+  private var podOperations: PODS = _
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+  private var labeledPods: LABELED_PODS = _
+  private var podList: PodList = _
+  private var pvcOperations: PERSISTENT_VOLUME_CLAIMS = _
+  private var pvcsWithNamespace: PVC_WITH_NAMESPACE = _
+
+  before {
+    kubernetesClient = mock(classOf[KubernetesClient])
+    sparkContext = mock(classOf[SparkContext])
+    schedulerBackend = mock(classOf[KubernetesClusterSchedulerBackend])
+    podOperations = mock(classOf[PODS])
+    podsWithNamespace = mock(classOf[PODS_WITH_NAMESPACE])
+    labeledPods = mock(classOf[LABELED_PODS])
+    podList = mock(classOf[PodList])
+    pvcOperations = mock(classOf[PERSISTENT_VOLUME_CLAIMS])
+    pvcsWithNamespace = mock(classOf[PVC_WITH_NAMESPACE])
+
+    when(sparkContext.applicationId).thenReturn(appId)
+    when(sparkContext.schedulerBackend).thenReturn(schedulerBackend)
+    when(schedulerBackend.kubernetesClient).thenReturn(kubernetesClient)
+    when(kubernetesClient.pods()).thenReturn(podOperations)
+    when(podOperations.inNamespace(namespace)).thenReturn(podsWithNamespace)
+    when(podsWithNamespace.withLabel(SPARK_APP_ID_LABEL, appId)).thenReturn(labeledPods)
+    when(labeledPods.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)).thenReturn(labeledPods)
+    when(labeledPods.list()).thenReturn(podList)
+    when(kubernetesClient.persistentVolumeClaims()).thenReturn(pvcOperations)
+    when(pvcOperations.inNamespace(namespace)).thenReturn(pvcsWithNamespace)
+  }
+
+  private def createPlugin(
+      threshold: Double = 0.9,
+      factor: Double = 0.1): ExecutorPVCResizeDriverPlugin = {
+    val plugin = new ExecutorPVCResizeDriverPlugin()
+    val cls = plugin.getClass
+    setField(cls, plugin, "sparkContext", sparkContext)
+    setField(cls, plugin, "namespace", namespace)
+    setField(cls, plugin, "threshold", threshold)
+    setField(cls, plugin, "factor", factor)
+    plugin
+  }
+
+  private def setField(cls: Class[_], obj: Any, name: String, value: Any): Unit = {
+    val f = cls.getDeclaredField(name)
+    f.setAccessible(true)
+    f.set(obj, value)
+  }
+
+  private def createPodWithPVC(
+      executorId: Long,
+      claimName: String,
+      mountPath: String,
+      containerName: String = DEFAULT_EXECUTOR_CONTAINER_NAME,
+      volumeName: String = "spark-local-dir-1"): Pod = {
+    new PodBuilder()
+      .withNewMetadata()
+        .withName(s"spark-executor-$executorId")
+        .addToLabels(SPARK_APP_ID_LABEL, appId)
+        .addToLabels(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
+        .addToLabels(SPARK_EXECUTOR_ID_LABEL, executorId.toString)
+      .endMetadata()
+      .withNewSpec()
+        .addNewVolume()
+          .withName(volumeName)
+          .withNewPersistentVolumeClaim()
+            .withClaimName(claimName)
+          .endPersistentVolumeClaim()
+        .endVolume()
+        .addNewContainer()
+          .withName(containerName)
+          .addNewVolumeMount()
+            .withName(volumeName)
+            .withMountPath(mountPath)
+          .endVolumeMount()
+        .endContainer()
+      .endSpec()
+      .build()
+  }
+
+  private def createPVC(
+      name: String,
+      storageBytes: String,
+      statusCapacityBytes: String = null): PersistentVolumeClaim = {
+    val builder = new PersistentVolumeClaimBuilder()
+      .withNewMetadata().withName(name).endMetadata()
+      .withNewSpec()
+        .withNewResources()
+          .addToRequests("storage", new Quantity(storageBytes))
+        .endResources()
+      .endSpec()
+    val cap = Option(statusCapacityBytes).getOrElse(storageBytes)
+    builder
+      .withNewStatus()
+        .addToCapacity("storage", new Quantity(cap))
+      .endStatus()
+      .build()
+  }
+
+  private def mockPvcResource(
+      pvcName: String,
+      storageBytes: String,
+      statusCapacityBytes: String = null): Resource[PersistentVolumeClaim] = {
+    val pvc = createPVC(pvcName, storageBytes, statusCapacityBytes)
+    val resource = mock(classOf[Resource[PersistentVolumeClaim]])
+    when(pvcsWithNamespace.withName(pvcName)).thenReturn(resource)
+    when(resource.get()).thenReturn(pvc)
+    resource
+  }
+
+  test("Empty pod list does not trigger any patch") {
+    val plugin = createPlugin()
+    when(podList.getItems).thenReturn(Collections.emptyList())
+    plugin.receive(PVCDiskUsageReport("1", 0.1))
+
+    plugin.checkAndResizePVCs()
+
+    verify(pvcsWithNamespace, never()).withName(org.mockito.ArgumentMatchers.anyString())
+  }
+
+  test("Usage below threshold does not trigger patch") {
+    val plugin = createPlugin()
+    val pod = createPodWithPVC(1, "pvc-1", "/data")
+    when(podList.getItems).thenReturn(Collections.singletonList(pod))
+    val resource = mockPvcResource("pvc-1", "1000000000") // 1GB
+    plugin.receive(PVCDiskUsageReport("1", 0.5)) // 50%
+
+    plugin.checkAndResizePVCs()
+
+    verify(resource, never()).patch(any(), any(classOf[PersistentVolumeClaim]))
+  }
+
+  test("Usage above threshold triggers patch with grown size") {
+    val plugin = createPlugin(threshold = 0.9, factor = 0.1)
+    val pod = createPodWithPVC(1, "pvc-1", "/data")
+    when(podList.getItems).thenReturn(Collections.singletonList(pod))
+    val resource = mockPvcResource("pvc-1", "1000000000") // 1GB
+    plugin.receive(PVCDiskUsageReport("1", 0.95)) // 95%
+
+    plugin.checkAndResizePVCs()
+
+    val captor = ArgumentCaptor.forClass(classOf[PersistentVolumeClaim])
+    verify(resource, times(1)).patch(any(), captor.capture())
+    val patched = Quantity.getAmountInBytes(
+      captor.getValue.getSpec.getResources.getRequests.get("storage")).longValue()
+    // current 1GB * (1 + factor 0.1) = 1.1GB
+    assert(patched === 1100000000L)
+  }
+
+  test("PVC with pending or failed resize is skipped") {
+    val plugin = createPlugin()
+    val pod = createPodWithPVC(1, "pvc-1", "/data")
+    when(podList.getItems).thenReturn(Collections.singletonList(pod))
+    // spec.requests.storage > status.capacity.storage simulates VolumeResizeFailed
+    // or in-progress resize.
+    val resource = mockPvcResource("pvc-1", "2000000000",
+      statusCapacityBytes = "1000000000")
+    plugin.receive(PVCDiskUsageReport("1", 0.95))
+
+    plugin.checkAndResizePVCs()
+
+    verify(resource, never()).patch(any(), any(classOf[PersistentVolumeClaim]))
+  }
+
+  test("Repeated reports for the same target size do not patch twice") {
+    val plugin = createPlugin()
+    val pod = createPodWithPVC(1, "pvc-1", "/data")
+    when(podList.getItems).thenReturn(Collections.singletonList(pod))
+    val resource = mockPvcResource("pvc-1", "1000000000")
+    plugin.receive(PVCDiskUsageReport("1", 0.95))
+
+    plugin.checkAndResizePVCs()
+    plugin.checkAndResizePVCs()
+
+    verify(resource, times(1)).patch(any(), any(classOf[PersistentVolumeClaim]))
+  }
+
+  test("Patch failure adds PVC to blacklist") {
+    val plugin = createPlugin()
+    val pod = createPodWithPVC(1, "pvc-1", "/data")
+    when(podList.getItems).thenReturn(Collections.singletonList(pod))
+    val resource = mockPvcResource("pvc-1", "1000000000")
+    when(resource.patch(any(), any(classOf[PersistentVolumeClaim])))
+      .thenThrow(new RuntimeException("expansion not allowed"))
+    plugin.receive(PVCDiskUsageReport("1", 0.95))
+
+    plugin.checkAndResizePVCs()
+    plugin.checkAndResizePVCs()
+
+    // Only one patch attempt despite two check rounds.
+    verify(resource, times(1)).patch(any(), any(classOf[PersistentVolumeClaim]))
+  }
+
+  test("Pod with no PVC volume triggers no patch") {
+    val plugin = createPlugin()
+    val pod = new PodBuilder()
+      .withNewMetadata()
+        .withName("spark-executor-1")
+        .addToLabels(SPARK_APP_ID_LABEL, appId)
+        .addToLabels(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
+        .addToLabels(SPARK_EXECUTOR_ID_LABEL, "1")
+      .endMetadata()
+      .withNewSpec()
+        .addNewContainer().withName(DEFAULT_EXECUTOR_CONTAINER_NAME).endContainer()
+      .endSpec()
+      .build()
+    when(podList.getItems).thenReturn(Collections.singletonList(pod))
+    plugin.receive(PVCDiskUsageReport("1", 0.95))
+
+    plugin.checkAndResizePVCs()
+
+    verify(pvcsWithNamespace, never()).withName(org.mockito.ArgumentMatchers.anyString())
+  }
+
+  test("pvcsOf returns claim names mounted by the executor container") {
+    val plugin = createPlugin()
+    val pod = createPodWithPVC(7, "pvc-7", "/spark-local")
+    assert(plugin.pvcsOf(pod) === Set("pvc-7"))
+  }
+
+  test("pvcsOf falls back to first container when default name absent") {
+    val plugin = createPlugin()
+    val pod = createPodWithPVC(1, "pvc-1", "/data", containerName = "custom")
+    assert(plugin.pvcsOf(pod) === Set("pvc-1"))
+  }
+
+  test("pvcsOf filters out non spark-local-dir-* PVC volumes") {
+    val plugin = createPlugin()
+    val pod = createPodWithPVC(1, "pvc-1", "/data", volumeName = "checkpointpvc")
+    assert(plugin.pvcsOf(pod) === Set.empty)
+  }
+
+  test("receive ignores non-report messages") {
+    val plugin = createPlugin()
+    assert(plugin.receive("unrelated") == null)
+    assert(plugin.receive(42) == null)
+  }
+
+  test("SPARK-56699: PVC_RESIZE_INTERVAL must be 0 or a positive multiple of 5 minutes") {
+    val conf = new SparkConf(false)
+    assert(conf.get(PVC_RESIZE_INTERVAL) === 5)
+    Seq("0", "5", "10", "15", "15min").foreach { v =>
+      conf.set(PVC_RESIZE_INTERVAL.key, v)
+      assert(conf.get(PVC_RESIZE_INTERVAL) >= 0)
+    }
+    Seq("1", "7", "-5").foreach { v =>
+      conf.set(PVC_RESIZE_INTERVAL.key, v)
+      intercept[IllegalArgumentException](conf.get(PVC_RESIZE_INTERVAL))
+    }
+  }
+
+  Seq("statefulset", "deployment").foreach { allocator =>
+    test(s"init returns early when pods allocator is '$allocator'") {
+      val plugin = new ExecutorPVCResizeDriverPlugin()
+      val sparkConf = new SparkConf().set(KUBERNETES_ALLOCATION_PODS_ALLOCATOR, allocator)
+      val sc = mock(classOf[SparkContext])
+      when(sc.conf).thenReturn(sparkConf)
+      val pluginCtx = mock(classOf[PluginContext])
+
+      val result = plugin.init(sc, pluginCtx)
+
+      assert(result.isEmpty)
+    }
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
index e0016a2ae0503..71c187a9caf83 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
@@ -43,6 +43,9 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn
   @Mock
   private var podOperations: PODS = _
 
+  @Mock
+  private var namespacedPodOperations: PODS_WITH_NAMESPACE = _
+
   @Mock
   private var appIdLabeledPods: LABELED_PODS = _
 
@@ -62,7 +65,9 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn
     MockitoAnnotations.openMocks(this).close()
     pollingExecutor = new DeterministicScheduler()
     when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+    when(podOperations.inNamespace(defaultConf.get(KUBERNETES_NAMESPACE)))
+      .thenReturn(namespacedPodOperations)
+    when(namespacedPodOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
       .thenReturn(appIdLabeledPods)
     when(appIdLabeledPods.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
       .thenReturn(executorRoleLabeledPods)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePluginSuite.scala
index 649ccd8a945df..9a0439b91f456 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePluginSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorResizePluginSuite.scala
@@ -29,7 +29,9 @@ import org.mockito.Mockito.{mock, never, times, verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.PrivateMethodTester
 
-import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.api.plugin.PluginContext
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_ALLOCATION_PODS_ALLOCATOR
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 
@@ -41,6 +43,7 @@ class ExecutorResizePluginSuite
 
   private var kubernetesClient: KubernetesClient = _
   private var sparkContext: SparkContext = _
+  private var schedulerBackend: KubernetesClusterSchedulerBackend = _
   private var podOperations: PODS = _
   private var podsWithNamespace: PODS_WITH_NAMESPACE = _
   private var labeledPods: LABELED_PODS = _
@@ -54,6 +57,7 @@ class ExecutorResizePluginSuite
   before {
     kubernetesClient = mock(classOf[KubernetesClient])
     sparkContext = mock(classOf[SparkContext])
+    schedulerBackend = mock(classOf[KubernetesClusterSchedulerBackend])
     podOperations = mock(classOf[PODS])
     podsWithNamespace = mock(classOf[PODS_WITH_NAMESPACE])
     labeledPods = mock(classOf[LABELED_PODS])
@@ -62,6 +66,8 @@ class ExecutorResizePluginSuite
     podMetricOperations = mock(classOf[PodMetricOperation])
 
     when(sparkContext.applicationId).thenReturn(appId)
+    when(sparkContext.schedulerBackend).thenReturn(schedulerBackend)
+    when(schedulerBackend.kubernetesClient).thenReturn(kubernetesClient)
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.inNamespace(namespace)).thenReturn(podsWithNamespace)
     when(podsWithNamespace.withLabel(SPARK_APP_ID_LABEL, appId)).thenReturn(labeledPods)
@@ -73,15 +79,9 @@ class ExecutorResizePluginSuite
 
   private def createPlugin(): ExecutorResizeDriverPlugin = {
     val plugin = new ExecutorResizeDriverPlugin()
-    // Use reflection to set private fields
     val scField = plugin.getClass.getDeclaredField("sparkContext")
     scField.setAccessible(true)
     scField.set(plugin, sparkContext)
-
-    val clientField = plugin.getClass.getDeclaredField("kubernetesClient")
-    clientField.setAccessible(true)
-    clientField.set(plugin, kubernetesClient)
-
     plugin
   }
 
@@ -124,7 +124,7 @@ class ExecutorResizePluginSuite
     val plugin = createPlugin()
     when(podList.getItems).thenReturn(Collections.emptyList())
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podMetricOperations, never()).metrics(anyString(), anyString())
   }
@@ -142,7 +142,7 @@ class ExecutorResizePluginSuite
 
     when(podList.getItems).thenReturn(Collections.singletonList(pod))
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podMetricOperations, never()).metrics(anyString(), anyString())
   }
@@ -158,7 +158,7 @@ class ExecutorResizePluginSuite
     val podResource = mock(classOf[SINGLE_POD])
     when(podsWithNamespace.withName("spark-executor-1")).thenReturn(podResource)
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podResource, never()).patch(any(), any(classOf[Pod]))
   }
@@ -175,7 +175,7 @@ class ExecutorResizePluginSuite
     when(podsWithNamespace.withName("spark-executor-1")).thenReturn(podResource)
     when(podResource.subresource(anyString())).thenReturn(podResource)
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podResource, times(1)).patch(any(), any(classOf[Pod]))
   }
@@ -191,7 +191,7 @@ class ExecutorResizePluginSuite
     val podResource = mock(classOf[SINGLE_POD])
     when(podsWithNamespace.withName("spark-executor-1")).thenReturn(podResource)
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podResource, never()).patch(any(), any(classOf[Pod]))
   }
@@ -223,7 +223,7 @@ class ExecutorResizePluginSuite
     val podResource = mock(classOf[SINGLE_POD])
     when(podsWithNamespace.withName("spark-executor-1")).thenReturn(podResource)
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podResource, never()).patch(any(), any(classOf[Pod]))
   }
@@ -246,7 +246,7 @@ class ExecutorResizePluginSuite
     when(podsWithNamespace.withName("spark-executor-2")).thenReturn(podResource2)
     when(podResource2.subresource(anyString())).thenReturn(podResource2)
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podResource1, never()).patch(any(), any(classOf[Pod]))
     verify(podResource2, times(1)).patch(any(), any(classOf[Pod]))
@@ -265,7 +265,7 @@ class ExecutorResizePluginSuite
     when(podResource.subresource(anyString())).thenReturn(podResource)
 
     // Use 50% threshold - 60% usage should trigger resize
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.5, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.5, 0.1, kubernetesClient))
 
     verify(podResource, times(1)).patch(any(), any(classOf[Pod]))
   }
@@ -283,8 +283,22 @@ class ExecutorResizePluginSuite
     when(podsWithNamespace.withName("spark-executor-1")).thenReturn(podResource)
     when(podResource.subresource(anyString())).thenReturn(podResource)
 
-    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1))
+    plugin.invokePrivate(_checkAndIncreaseMemory(namespace, 0.9, 0.1, kubernetesClient))
 
     verify(podResource, times(1)).patch(any(), any(classOf[Pod]))
   }
+
+  Seq("statefulset", "deployment").foreach { allocator =>
+    test(s"init returns early when pods allocator is '$allocator'") {
+      val plugin = new ExecutorResizeDriverPlugin()
+      val sparkConf = new SparkConf().set(KUBERNETES_ALLOCATION_PODS_ALLOCATOR, allocator)
+      val sc = mock(classOf[SparkContext])
+      when(sc.conf).thenReturn(sparkConf)
+      val pluginCtx = mock(classOf[PluginContext])
+
+      val result = plugin.init(sc, pluginCtx)
+
+      assert(result.isEmpty)
+    }
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index e24ffe9ef52b6..cf172eb096d47 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -160,6 +160,10 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     verify(configMapResource).create()
   }
 
+  test("SPARK-56684: kubernetesClient is exposed within the k8s package") {
+    assert(schedulerBackendUnderTest.kubernetesClient eq kubernetesClient)
+  }
+
   test("Stop all components") {
     when(podsWithNamespace.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)).thenReturn(labeledPods)
     when(labeledPods.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)).thenReturn(labeledPods)
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 84cba5568d27c..4af2359237d36 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 ARG java_image_name=azul/zulu-openjdk
-ARG java_image_tag=21-jre
+ARG java_image_tag=25-jre
 
 FROM ${java_image_name}:${java_image_tag}
 LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 9b30383d87620..7a7ab5ebd11ec 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -336,11 +336,11 @@ You can also specify your specific dockerfile to build JVM/Python/R based image
 
 ## Requirements
 - A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.
-- Volcano v1.14.1.
+- Volcano v1.14.2.
 
 ## Installation
 
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.1/installer/volcano-development.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.2/installer/volcano-development.yaml
 
 ## Run tests
 
@@ -361,5 +361,5 @@ You can also specify `volcano` tag to only run Volcano test:
 
 ## Cleanup Volcano
 
-    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.1/installer/volcano-development.yaml
+    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.2/installer/volcano-development.yaml
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index abddd83765a50..ee806afeaba7f 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 5ba3648f071f2..aecf6a1ceb626 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 9389a13e292f8..3018cb8ed739a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -20,6 +20,7 @@ package org.apache.spark.scheduler.cluster
 import java.io.InterruptedIOException
 
 import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
 
 import org.apache.hadoop.yarn.api.records.{FinalApplicationStatus, YarnApplicationState}
 
@@ -135,6 +136,15 @@ private[spark] class YarnClientSchedulerBackend(
       } catch {
         case _: InterruptedException | _: InterruptedIOException =>
           logInfo("Interrupting monitor thread")
+        case NonFatal(e) =>
+          logError(log"Unexpected error in YARN application state monitor thread.", e)
+          allowInterrupt = false
+          sc.stop(1)
+          if (conf.get(AM_CLIENT_MODE_EXIT_ON_ERROR)) {
+            logWarning(log"SparkContext stopped due to unexpected error, " +
+              log"exiting with code 1.")
+            System.exit(1)
+          }
       }
     }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackendSuite.scala
new file mode 100644
index 0000000000000..da231bd2d7078
--- /dev/null
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackendSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler.cluster
+
+import java.util.concurrent.{CountDownLatch, TimeUnit}
+
+import org.mockito.ArgumentMatchers.{anyBoolean, anyLong}
+import org.mockito.Mockito.{mock, when}
+
+import org.apache.spark._
+import org.apache.spark.deploy.yarn.Client
+import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd, TaskSchedulerImpl}
+
+class YarnClientSchedulerBackendSuite extends SparkFunSuite with LocalSparkContext {
+
+  test("SPARK-57191: MonitorThread calls sc.stop() on unexpected exception") {
+    val stopCalled = new CountDownLatch(1)
+    sc = new SparkContext("local", "test", new SparkConf().set("spark.testing", "true"))
+    sc.addSparkListener(new SparkListener {
+      override def onApplicationEnd(e: SparkListenerApplicationEnd): Unit =
+        stopCalled.countDown()
+    })
+
+    val backend = new YarnClientSchedulerBackend(
+      sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc)
+
+    // Simulate MonitorThread hitting an unexpected non-fatal error
+    // (e.g., credential expiry, network failure)
+    val mockClient = mock(classOf[Client])
+    when(mockClient.monitorApplication(anyBoolean(), anyBoolean(), anyLong()))
+      .thenThrow(new RuntimeException("Simulated failure"))
+
+    // Use reflection since client/appId are private and MonitorThread is an inner class
+    val clientField = backend.getClass.getDeclaredFields.find(_.getName.endsWith("client")).get
+    clientField.setAccessible(true)
+    clientField.set(backend, mockClient)
+
+    val appIdField = classOf[YarnSchedulerBackend].getDeclaredField("appId")
+    appIdField.setAccessible(true)
+    appIdField.set(backend,
+      Some(org.apache.hadoop.yarn.api.records.ApplicationId.newInstance(0L, 1)))
+
+    val monitorMethod = backend.getClass.getDeclaredMethod("asyncMonitorApplication")
+    monitorMethod.setAccessible(true)
+    val monitorThread = monitorMethod.invoke(backend).asInstanceOf[Thread]
+
+    // Assign to backend.monitorThread so the full shutdown path is exercised:
+    // sc.stop() -> YarnClientSchedulerBackend.stop() -> monitorThread.stopMonitor()
+    val monitorField = backend.getClass.getDeclaredFields
+      .find(_.getName.endsWith("monitorThread")).get
+    monitorField.setAccessible(true)
+    monitorField.set(backend, monitorThread)
+
+    monitorThread.start()
+
+    assert(stopCalled.await(10, TimeUnit.SECONDS),
+      "sc.stop() was not called after MonitorThread hit an unexpected exception")
+  }
+}
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 52ae8b44c2134..9c8a28c179f7a 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.13</artifactId>
-        <version>4.2.0.1-4.3.0-0</version>
+        <version>4.2.0.1-4.3.0-1</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
@@ -107,4 +107,4 @@
             </plugin>
         </plugins>
     </build>
-</project>
\ No newline at end of file
+</project>
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 59a0034f922e4..af71f441012c1 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -139,6 +139,7 @@ AND: 'AND';
 ANTI: 'ANTI';
 ANY: 'ANY';
 ANY_VALUE: 'ANY_VALUE';
+APPROX: 'APPROX';
 ARCHIVE: 'ARCHIVE';
 ARRAY: 'ARRAY' {incComplexTypeLevelCounter();};
 AS: 'AS';
@@ -148,6 +149,7 @@ AT: 'AT';
 ATOMIC: 'ATOMIC';
 AUTHORIZATION: 'AUTHORIZATION';
 BEGIN: 'BEGIN';
+BERNOULLI: 'BERNOULLI';
 BETWEEN: 'BETWEEN';
 BIGINT: 'BIGINT';
 BINARY: 'BINARY';
@@ -234,6 +236,7 @@ DETERMINISTIC: 'DETERMINISTIC';
 DFS: 'DFS';
 DIRECTORIES: 'DIRECTORIES';
 DIRECTORY: 'DIRECTORY';
+DISTANCE: 'DISTANCE';
 DISTINCT: 'DISTINCT';
 DISTRIBUTE: 'DISTRIBUTE';
 DIV: 'DIV';
@@ -247,6 +250,7 @@ ENFORCED: 'ENFORCED';
 ESCAPE: 'ESCAPE';
 ESCAPED: 'ESCAPED';
 EVOLUTION: 'EVOLUTION';
+EXACT: 'EXACT';
 EXCEPT: 'EXCEPT';
 EXCHANGE: 'EXCHANGE';
 EXCLUDE: 'EXCLUDE';
@@ -366,6 +370,7 @@ NAMESPACES: 'NAMESPACES';
 NANOSECOND: 'NANOSECOND';
 NANOSECONDS: 'NANOSECONDS';
 NATURAL: 'NATURAL';
+NEAREST: 'NEAREST';
 NEXT: 'NEXT';
 NO: 'NO';
 NONE: 'NONE';
@@ -456,6 +461,7 @@ SETMINUS: 'MINUS';
 SETS: 'SETS';
 SHORT: 'SHORT';
 SHOW: 'SHOW';
+SIMILARITY: 'SIMILARITY';
 SINGLE: 'SINGLE';
 SKEWED: 'SKEWED';
 SMALLINT: 'SMALLINT';
@@ -478,6 +484,7 @@ STRUCT: 'STRUCT' {incComplexTypeLevelCounter();};
 SUBSTR: 'SUBSTR';
 SUBSTRING: 'SUBSTRING';
 SYNC: 'SYNC';
+SYSTEM: 'SYSTEM';
 SYSTEM_TIME: 'SYSTEM_TIME';
 SYSTEM_VERSION: 'SYSTEM_VERSION';
 SYSTEM_PATH: 'SYSTEM_PATH';
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 1a0382dbe10c4..5761028f60234 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -217,6 +217,10 @@ singleTableSchema
     : colTypeList EOF
     ;
 
+singlePathElementList
+    : pathElement (COMMA pathElement)* EOF
+    ;
+
 singleRoutineParamList
     : colDefinitionList EOF
     ;
@@ -1049,7 +1053,7 @@ relationExtension
     ;
 
 joinRelation
-    : (joinType) JOIN LATERAL? right=relationPrimary joinCriteria?
+    : (joinType) JOIN LATERAL? right=relationPrimary (joinCriteria | nearestByClause)?
     | NATURAL joinType JOIN LATERAL? right=relationPrimary
     ;
 
@@ -1068,8 +1072,14 @@ joinCriteria
     | USING identifierList
     ;
 
+nearestByClause
+    : (APPROX | EXACT) NEAREST num=INTEGER_VALUE? BY (DISTANCE | SIMILARITY) expression
+    ;
+
 sample
-    : TABLESAMPLE LEFT_PAREN sampleMethod? RIGHT_PAREN (REPEATABLE LEFT_PAREN seed=integerValue RIGHT_PAREN)?
+    : TABLESAMPLE (sampleType=(SYSTEM | BERNOULLI))?
+      LEFT_PAREN sampleMethod? RIGHT_PAREN
+      (REPEATABLE LEFT_PAREN seed=integerValue RIGHT_PAREN)?
     ;
 
 sampleMethod
@@ -1930,6 +1940,7 @@ ansiNonReserved
     | ANALYZE
     | ANTI
     | ANY_VALUE
+    | APPROX
     | ARCHIVE
     | ARRAY
     | ASC
@@ -1937,6 +1948,7 @@ ansiNonReserved
     | AT
     | ATOMIC
     | BEGIN
+    | BERNOULLI
     | BETWEEN
     | BIGINT
     | BINARY
@@ -2006,6 +2018,7 @@ ansiNonReserved
     | DFS
     | DIRECTORIES
     | DIRECTORY
+    | DISTANCE
     | DISTRIBUTE
     | DIV
     | DO
@@ -2015,6 +2028,7 @@ ansiNonReserved
     | ENFORCED
     | ESCAPED
     | EVOLUTION
+    | EXACT
     | EXCHANGE
     | EXCLUDE
     | EXCLUSIVE
@@ -2112,6 +2126,7 @@ ansiNonReserved
     | NAMESPACES
     | NANOSECOND
     | NANOSECONDS
+    | NEAREST
     | NEXT
     | NO
     | NONE
@@ -2187,6 +2202,7 @@ ansiNonReserved
     | SETS
     | SHORT
     | SHOW
+    | SIMILARITY
     | SINGLE
     | SKEWED
     | SMALLINT
@@ -2207,6 +2223,7 @@ ansiNonReserved
     | SUBSTR
     | SUBSTRING
     | SYNC
+    | SYSTEM
     | SYSTEM_PATH
     | SYSTEM_TIME
     | SYSTEM_VERSION
@@ -2303,6 +2320,7 @@ nonReserved
     | AND
     | ANY
     | ANY_VALUE
+    | APPROX
     | ARCHIVE
     | ARRAY
     | AS
@@ -2312,6 +2330,7 @@ nonReserved
     | ATOMIC
     | AUTHORIZATION
     | BEGIN
+    | BERNOULLI
     | BETWEEN
     | BIGINT
     | BINARY
@@ -2398,6 +2417,7 @@ nonReserved
     | DFS
     | DIRECTORIES
     | DIRECTORY
+    | DISTANCE
     | DISTINCT
     | DISTRIBUTE
     | DIV
@@ -2411,6 +2431,7 @@ nonReserved
     | ESCAPE
     | ESCAPED
     | EVOLUTION
+    | EXACT
     | EXCHANGE
     | EXCLUDE
     | EXCLUSIVE
@@ -2523,6 +2544,7 @@ nonReserved
     | NAMESPACES
     | NANOSECOND
     | NANOSECONDS
+    | NEAREST
     | NEXT
     | NO
     | NONE
@@ -2609,6 +2631,7 @@ nonReserved
     | SETS
     | SHORT
     | SHOW
+    | SIMILARITY
     | SINGLE
     | SKEWED
     | SMALLINT
@@ -2631,6 +2654,7 @@ nonReserved
     | SUBSTR
     | SUBSTRING
     | SYNC
+    | SYSTEM
     | SYSTEM_PATH
     | SYSTEM_TIME
     | SYSTEM_VERSION
diff --git a/sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java b/sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java
index 8536df1ec74f6..99eaac789bffa 100644
--- a/sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java
+++ b/sql/api/src/main/java/org/apache/spark/sql/streaming/Trigger.java
@@ -22,7 +22,6 @@
 import scala.concurrent.duration.Duration;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.execution.streaming.AvailableNowTrigger$;
 import org.apache.spark.sql.execution.streaming.ContinuousTrigger;
 import org.apache.spark.sql.execution.streaming.OneTimeTrigger$;
@@ -183,7 +182,6 @@ public static Trigger Continuous(String interval) {
    * A trigger for real time mode, with batch at the specified duration.
    *
    */
-  @Experimental
   public static Trigger RealTime(long batchDurationMs) {
     return RealTimeTrigger.apply(batchDurationMs);
   }
@@ -192,7 +190,6 @@ public static Trigger RealTime(long batchDurationMs) {
    * A trigger for real time mode, with batch at the specified duration.
    *
    */
-  @Experimental
   public static Trigger RealTime(long batchDuration, TimeUnit timeUnit) {
     return RealTimeTrigger.create(batchDuration, timeUnit);
   }
@@ -205,7 +202,6 @@ public static Trigger RealTime(long batchDuration, TimeUnit timeUnit) {
    *    df.writeStream.trigger(Trigger.RealTime(10.seconds))
    * }}}
    */
-  @Experimental
   public static Trigger RealTime(Duration batchDuration) {
     return RealTimeTrigger.apply(batchDuration);
   }
@@ -217,7 +213,6 @@ public static Trigger RealTime(Duration batchDuration) {
    *    df.writeStream.trigger(Trigger.RealTime("10 seconds"))
    * }}}
    */
-  @Experimental
   public static Trigger RealTime(String batchDuration) {
     return RealTimeTrigger.apply(batchDuration);
   }
@@ -226,7 +221,6 @@ public static Trigger RealTime(String batchDuration) {
    * A trigger for real time mode, with batch at the specified duration. The default duration is 5
    * minutes.
    */
-  @Experimental
   public static Trigger RealTime() {
     return RealTimeTrigger.apply();
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 0bde59155f3f3..4ccca9eabe933 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -20,7 +20,7 @@ import java.util.{Locale, Map, Properties}
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.annotation.Stable
+import org.apache.spark.annotation.{Experimental, Stable}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.CompilationErrors
 
@@ -216,6 +216,18 @@ abstract class DataFrameWriter[T] {
     this
   }
 
+  /**
+   * Enable automatic schema evolution for this write. The target table must declare the
+   * `AUTOMATIC_SCHEMA_EVOLUTION` capability.
+   *
+   * @since 4.2.0
+   */
+  @Experimental
+  def withSchemaEvolution(): this.type = {
+    this._withSchemaEvolution = true
+    this
+  }
+
   /**
    * Saves the content of the `DataFrame` at the specified path.
    *
@@ -515,4 +527,6 @@ abstract class DataFrameWriter[T] {
   protected var sortColumnNames: Option[Seq[String]] = None
 
   protected var clusteringColumns: Option[Seq[String]] = None
+
+  protected var _withSchemaEvolution: Boolean = false
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
index 66a4b4232a22d..852680465c138 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -30,6 +30,20 @@ import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableExceptio
 @Experimental
 abstract class DataFrameWriterV2[T] extends CreateTableWriter[T] {
 
+  private[sql] var _withSchemaEvolution: Boolean = false
+
+  /**
+   * Enable automatic schema evolution for this write. The target table must declare the
+   * `AUTOMATIC_SCHEMA_EVOLUTION` capability.
+   *
+   * @since 4.2.0
+   */
+  @Experimental
+  def withSchemaEvolution(): this.type = {
+    this._withSchemaEvolution = true
+    this
+  }
+
   /** @inheritdoc */
   override def using(provider: String): this.type
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/api/src/main/scala/org/apache/spark/sql/Dataset.scala
index c3c983c17bb02..38765262e1fc5 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -912,6 +912,76 @@ abstract class Dataset[T] extends Serializable {
    */
   def lateralJoin(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame
 
+  /**
+   * Nearest-by top-K ranking join with another `DataFrame`, using the default `inner` join type.
+   * For each row on the left (query side), returns up to `numResults` rows from `right` (base
+   * side), ranked by `rankingExpression`.
+   *
+   * Equivalent SQL (with `mode = "exact"` and `direction = "similarity"`):
+   * {{{
+   *   left INNER JOIN right EXACT NEAREST numResults BY SIMILARITY rankingExpression
+   * }}}
+   *
+   * The current implementation evaluates the full cross-product of left and right and bounds
+   * memory per left row by `numResults`. Index-backed approximate strategies (transparent to
+   * `approx` mode) are planned for a future release; until then, pre-filter the right side when
+   * it is large. Tie-breaking among rows with equal ranking values is unspecified.
+   *
+   * @param right
+   *   Right (base side) of the join - the candidate pool searched for each row of this Dataset.
+   * @param rankingExpression
+   *   Scalar expression used to rank candidate rows.
+   * @param numResults
+   *   Maximum number of matches per query row. Must be between 1 and 100000.
+   * @param mode
+   *   Search algorithm contract. Must be one of: `approx`, `exact`. `approx` allows the optimizer
+   *   to use indexed or other approximate strategies when available; `exact` forces brute-force
+   *   evaluation and requires the ranking expression to be deterministic.
+   * @param direction
+   *   `"distance"` (smallest value first) or `"similarity"` (largest value first).
+   * @group untypedrel
+   * @since 4.2.0
+   */
+  def nearestByJoin(
+      right: Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      mode: String,
+      direction: String): DataFrame
+
+  /**
+   * Nearest-by top-K ranking join with another `DataFrame`.
+   *
+   * The current implementation evaluates the full cross-product of left and right and bounds
+   * memory per left row by `numResults`. Index-backed approximate strategies (transparent to
+   * `approx` mode) are planned for a future release; until then, pre-filter the right side when
+   * it is large. Tie-breaking among rows with equal ranking values is unspecified.
+   *
+   * @param right
+   *   Right (base side) of the join - the candidate pool searched for each row of this Dataset.
+   * @param rankingExpression
+   *   Scalar expression used to rank candidate rows.
+   * @param numResults
+   *   Maximum number of matches per query row. Must be between 1 and 100000.
+   * @param mode
+   *   Search algorithm contract. Must be one of: `approx`, `exact`. `approx` allows the optimizer
+   *   to use indexed or other approximate strategies when available; `exact` forces brute-force
+   *   evaluation and requires the ranking expression to be deterministic.
+   * @param direction
+   *   `"distance"` (smallest value first) or `"similarity"` (largest value first).
+   * @param joinType
+   *   Type of join to perform. Must be one of: `inner`, `leftouter`.
+   * @group untypedrel
+   * @since 4.2.0
+   */
+  def nearestByJoin(
+      right: Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      mode: String,
+      direction: String,
+      joinType: String): DataFrame
+
   protected def sortInternal(global: Boolean, sortExprs: Seq[Column]): Dataset[T]
 
   /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index 7ea5fe9bfb102..4cf7c324a4061 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -61,20 +61,21 @@ abstract class RuntimeConfig {
 
   /**
    * Returns the value of Spark runtime configuration property for the given key. If the key is
-   * not set yet, return its default value if possible, otherwise `NoSuchElementException` will be
-   * thrown.
+   * not explicitly set, return its built-in default value if one exists, otherwise
+   * `NoSuchElementException` will be thrown.
    *
    * @throws java.util.NoSuchElementException
-   *   if the key is not set and does not have a default value
+   *   if the key is not set and does not have a built-in default value
    * @since 2.0.0
    */
-  @throws[NoSuchElementException]("if the key is not set and there is no default value")
+  @throws[NoSuchElementException]("if the key is not set and there is no built-in default value")
   def get(key: String): String
 
   /**
    * Returns the value of Spark runtime configuration property for the given key. If the key is
-   * not set yet, return the user given `default`. This is useful when its default value defined
-   * by Apache Spark is not the desired one.
+   * not explicitly set, return the user given `default` instead of the key's built-in default
+   * value (if any). This is useful when the built-in default value defined by Apache Spark is not
+   * the desired one.
    *
    * @since 2.0.0
    */
@@ -113,7 +114,8 @@ abstract class RuntimeConfig {
 
   /**
    * Returns the value of Spark runtime configuration property for the given key. If the key is
-   * not set yet, return its default value if possible, otherwise `None` will be returned.
+   * not explicitly set, return `Some` of its built-in default value if one exists, otherwise
+   * `None` will be returned.
    *
    * @since 2.0.0
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala
index 76442accbd35b..e42578deaa1e4 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, NullType, StructType, UserDefinedType, VariantType}
+import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, GeographyType, GeometryType, NullType, StructType, UserDefinedType, VariantType}
 
 object OrderUtils {
 
@@ -26,6 +26,10 @@ object OrderUtils {
   def isOrderable(dataType: DataType): Boolean = dataType match {
     case NullType => true
     case VariantType => false
+    // GEOMETRY and GEOGRAPHY are atomic types backed by opaque BinaryView bytes that have no
+    // meaningful ordering, so reject them before the AtomicType case below.
+    case _: GeometryType => false
+    case _: GeographyType => false
     case dt: AtomicType => true
     case struct: StructType => struct.fields.forall(f => isOrderable(f.dataType))
     case array: ArrayType => isOrderable(array.elementType)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/plans/NearestByJoinValidation.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/plans/NearestByJoinValidation.scala
new file mode 100644
index 0000000000000..8ebac8e73c671
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/plans/NearestByJoinValidation.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+/**
+ * Acceptance lists for the `NEAREST BY` join API.
+ */
+private[sql] object NearestByJoinValidation {
+
+  /** Upper bound on `numResults`. Mirrors the K-overload limit of `MaxMinByK`. */
+  val MaxNumResults: Int = 100000
+
+  /**
+   * Strings accepted by `joinType` after lower-casing and stripping `_` (so e.g. `LEFT_OUTER`
+   * canonicalizes to `leftouter`). Every consumer must apply the same canonicalization before
+   * checking membership.
+   */
+  val SupportedJoinTypes: Seq[String] = Seq("inner", "leftouter", "left")
+
+  /** Display form for `supported` in `NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE` error messages. */
+  val SupportedJoinTypeDisplay: String = "'INNER', 'LEFT OUTER'"
+
+  /** Strings accepted by `mode`. Lower-cased before membership check. */
+  val SupportedModes: Seq[String] = Seq("approx", "exact")
+
+  /** Strings accepted by `direction`. Lower-cased before membership check. */
+  val SupportedDirections: Seq[String] = Seq("distance", "similarity")
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 69ff4c9cd108f..a3cbf8753f70a 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -72,6 +72,13 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
+  def insertReplaceWhereTableAliasNotAllowed(ctx: TableAliasContext): Throwable = {
+    new ParseException(
+      errorClass = "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED",
+      messageParameters = Map.empty,
+      ctx)
+  }
+
   def columnAliasInOperationNotAllowedError(op: String, ctx: TableAliasContext): Throwable = {
     new ParseException(
       errorClass = "COLUMN_ALIASES_NOT_ALLOWED",
@@ -203,6 +210,33 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
+  def nearestByJoinWithLateralUnsupportedError(ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.LATERAL_JOIN_NEAREST_BY",
+      messageParameters = Map.empty,
+      ctx)
+  }
+
+  def unsupportedNearestByJoinTypeError(
+      ctx: ParserRuleContext,
+      joinType: String,
+      supported: String): Throwable = {
+    new ParseException(
+      errorClass = "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+      messageParameters = Map("joinType" -> toSQLStmt(joinType), "supported" -> supported),
+      ctx)
+  }
+
+  def nearestByJoinNumResultsOutOfRangeError(
+      ctx: ParserRuleContext,
+      numResults: String,
+      max: Int): Throwable = {
+    new ParseException(
+      errorClass = "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+      messageParameters = Map("numResults" -> numResults, "min" -> "1", "max" -> max.toString),
+      ctx)
+  }
+
   def repetitiveWindowDefinitionError(name: String, ctx: WindowClauseContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.REPETITIVE_WINDOW_DEFINITION",
@@ -482,6 +516,22 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
+  def tableSampleSystemRepeatableError(ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_REPEATABLE",
+      messageParameters = Map.empty,
+      ctx)
+  }
+
+  def tableSampleSystemSampleMethodError(
+      sampleMethod: String,
+      ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_SAMPLE_METHOD",
+      messageParameters = Map("sampleMethod" -> sampleMethod),
+      ctx)
+  }
+
   def invalidStatementError(operation: String, ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_STATEMENT_OR_CLAUSE",
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala b/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
index ea7100fcc23a8..cdd6e60a3dfcc 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
@@ -24,7 +24,6 @@ import scala.concurrent.duration.{Duration, MINUTES}
 import org.json4s.DefaultFormats
 
 import org.apache.spark.SparkIllegalArgumentException
-import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY
 import org.apache.spark.sql.catalyst.util.SparkDateTimeUtils.microsToMillis
 import org.apache.spark.sql.catalyst.util.SparkIntervalUtils
@@ -123,14 +122,12 @@ object ContinuousTrigger {
  * @param batchDurationMs
  *   The duration of each batch in milliseconds. This must be strictly positive.
  */
-@Experimental
 case class RealTimeTrigger(batchDurationMs: Long) extends Trigger {
   require(batchDurationMs > 0, "the batch duration should not be negative")
 
   implicit val defaultFormats: DefaultFormats = DefaultFormats
 }
 
-@Experimental
 object RealTimeTrigger {
   import Triggers._
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index b3bd22e6323b5..ce81d59439749 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2032,7 +2032,7 @@ object functions {
    * sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_bigint(e: Column, k: Column): Column =
     Column.fn("kll_merge_agg_bigint", e, k)
@@ -2044,7 +2044,7 @@ object functions {
    * sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_bigint(e: Column, k: Int): Column =
     Column.fn("kll_merge_agg_bigint", e, lit(k))
@@ -2056,7 +2056,7 @@ object functions {
    * sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_bigint(columnName: String, k: Int): Column =
     kll_merge_agg_bigint(Column(columnName), k)
@@ -2067,7 +2067,7 @@ object functions {
    * sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_bigint(e: Column): Column =
     Column.fn("kll_merge_agg_bigint", e)
@@ -2078,7 +2078,7 @@ object functions {
    * sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_bigint(columnName: String): Column =
     kll_merge_agg_bigint(Column(columnName))
@@ -2089,7 +2089,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_float(e: Column, k: Column): Column =
     Column.fn("kll_merge_agg_float", e, k)
@@ -2100,7 +2100,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_float(e: Column, k: Int): Column =
     Column.fn("kll_merge_agg_float", e, lit(k))
@@ -2111,7 +2111,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_float(columnName: String, k: Int): Column =
     kll_merge_agg_float(Column(columnName), k)
@@ -2121,7 +2121,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_float(e: Column): Column =
     Column.fn("kll_merge_agg_float", e)
@@ -2131,7 +2131,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_float(columnName: String): Column =
     kll_merge_agg_float(Column(columnName))
@@ -2142,7 +2142,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_double(e: Column, k: Column): Column =
     Column.fn("kll_merge_agg_double", e, k)
@@ -2153,7 +2153,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_double(e: Column, k: Int): Column =
     Column.fn("kll_merge_agg_double", e, lit(k))
@@ -2164,7 +2164,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_double(columnName: String, k: Int): Column =
     kll_merge_agg_double(Column(columnName), k)
@@ -2174,7 +2174,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_double(e: Column): Column =
     Column.fn("kll_merge_agg_double", e)
@@ -2184,7 +2184,7 @@ object functions {
    * If k is not specified, the merged sketch adopts the k value from the first input sketch.
    *
    * @group agg_funcs
-   * @since 4.1.0
+   * @since 4.1.2
    */
   def kll_merge_agg_double(columnName: String): Column =
     kll_merge_agg_double(Column(columnName))
@@ -8486,6 +8486,40 @@ object functions {
   def timestamp_add(unit: String, quantity: Column, ts: Column): Column =
     Column.internalFn("timestampadd", lit(unit), quantity, ts)
 
+  /**
+   * Returns the start of the fixed-size bucket of `bucketSize` that contains `ts`, with buckets
+   * aligned to the default origin (1970-01-01 00:00:00). For `TIMESTAMP_NTZ`, bucketing is
+   * performed in UTC. For `TIMESTAMP`, year-month interval buckets and calendar-day components of
+   * day-time interval buckets align to the session time zone.
+   *
+   * @param bucketSize
+   *   A day-time or year-month interval defining the bucket size. Must be positive and foldable.
+   * @param ts
+   *   A TIMESTAMP or TIMESTAMP_NTZ value to bucket.
+   * @group datetime_funcs
+   * @since 4.2.0
+   */
+  def time_bucket(bucketSize: Column, ts: Column): Column =
+    Column.fn("time_bucket", bucketSize, ts)
+
+  /**
+   * Returns the start of the fixed-size bucket of `bucketSize` that contains `ts`, with buckets
+   * aligned to `origin`. For `TIMESTAMP_NTZ`, bucketing is performed in UTC. For `TIMESTAMP`,
+   * year-month interval buckets and calendar-day components of day-time interval buckets align to
+   * the session time zone.
+   *
+   * @param bucketSize
+   *   A day-time or year-month interval defining the bucket size. Must be positive and foldable.
+   * @param ts
+   *   A TIMESTAMP or TIMESTAMP_NTZ value to bucket.
+   * @param origin
+   *   Alignment anchor. Must be the same type as `ts` and must be foldable.
+   * @group datetime_funcs
+   * @since 4.2.0
+   */
+  def time_bucket(bucketSize: Column, ts: Column, origin: Column): Column =
+    Column.fn("time_bucket", bucketSize, ts, origin)
+
   /**
    * Returns the difference between two times, measured in specified units. Throws a
    * SparkIllegalArgumentException, in case the specified unit is not supported.
@@ -9551,6 +9585,17 @@ object functions {
    */
   def is_variant_null(v: Column): Column = Column.fn("is_variant_null", v)
 
+  /**
+   * Check if a variant value is valid. Returns true if the variant is valid, false if it is
+   * malformed, and NULL if the input is NULL.
+   *
+   * @param v
+   *   a variant column.
+   * @group variant_funcs
+   * @since 4.2.0
+   */
+  def is_valid_variant(v: Column): Column = Column.fn("is_valid_variant", v)
+
   /**
    * Extracts a sub-variant from `v` according to `path` string, and then cast the sub-variant to
    * `targetType`. Returns null if the path does not exist. Throws an exception if the cast fails.
@@ -11121,6 +11166,24 @@ object functions {
   def st_asbinary(geo: Column): Column =
     Column.fn("st_asbinary", geo)
 
+  /**
+   * Returns the input GEOGRAPHY or GEOMETRY value in WKB format using the specified endianness.
+   *
+   * @group st_funcs
+   * @since 4.2.0
+   */
+  def st_asbinary(geo: Column, endianness: Column): Column =
+    Column.fn("st_asbinary", geo, endianness)
+
+  /**
+   * Returns the input GEOGRAPHY or GEOMETRY value in WKB format using the specified endianness.
+   *
+   * @group st_funcs
+   * @since 4.2.0
+   */
+  def st_asbinary(geo: Column, endianness: String): Column =
+    Column.fn("st_asbinary", geo, lit(endianness))
+
   /**
    * Parses the WKB description of a geography and returns the corresponding GEOGRAPHY value.
    *
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index dc7d2b94576ea..d4d8f162485ff 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -17,9 +17,8 @@
 package org.apache.spark.sql.streaming
 
 import scala.jdk.CollectionConverters._
-import scala.util.matching.Regex
 
-import org.apache.spark.annotation.Evolving
+import org.apache.spark.annotation.{Evolving, Experimental}
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders}
 import org.apache.spark.sql.types.StructType
 
@@ -104,6 +103,17 @@ abstract class DataStreamReader {
     this
   }
 
+  /**
+   * Specifies a name for the streaming source. This name is used to identify the source in
+   * checkpoint metadata and enables stable checkpoint locations for source evolution.
+   *
+   * @param sourceName
+   *   the name to assign to this streaming source
+   * @since 4.2.0
+   */
+  @Experimental
+  def name(sourceName: String): this.type
+
   /**
    * Loads input data stream in as a `DataFrame`, for data streams that don't require a path (e.g.
    * external key-value stores).
@@ -131,6 +141,46 @@ abstract class DataStreamReader {
    *     .changes("my_table")
    * }}}
    *
+   * Streaming reads support all of the same post-processing as batch reads -- `computeUpdates`,
+   * `deduplicationMode = dropCarryovers`, and `deduplicationMode = netChanges`. The streaming
+   * netChanges path holds per-row-identity state in the state store and emits the SPIP collapse
+   * output once the global watermark advances past the last `_commit_timestamp` observed for that
+   * row identity. Row identities only touched in the latest observed commit are therefore not
+   * emitted until a later commit (with strictly greater `_commit_timestamp`) advances the
+   * watermark past them, or the source terminates.
+   *
+   * Streaming netChanges differs from batch netChanges in scope. Batch netChanges collapses all
+   * changes for a row identity over the entire requested version range. Streaming netChanges is
+   * incremental: it collapses changes that fall within a single watermark window for a row
+   * identity (i.e. up to the timer firing that emits its current net result). After a row
+   * identity's net result has been emitted, subsequent commits on the same identity start a fresh
+   * window and produce additional output rows -- streaming cannot retract previously emitted
+   * results to match the batch range-scoped collapse. For a query that observes id=1 inserted at
+   * v1 and deleted at v3 with another commit at v2 in between, batch netChanges over [v1..v3]
+   * cancels to no row, while streaming emits an `insert` (when v2 advances the watermark past v1)
+   * followed later by a `delete` (when end-of-stream or another commit advances the watermark
+   * past v3).
+   *
+   * Because the streaming netChanges path uses `transformWithState`, the state store provider
+   * must be RocksDB. Set `spark.sql.streaming.stateStore.providerClass` to
+   * `org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider` before starting a
+   * streaming netChanges query; the default HDFS-backed provider is rejected at query start.
+   *
+   * When the requested options engage post-processing (carry-over removal, update detection, or
+   * netChanges), the rewrite injects an internal `EventTimeWatermark` on `_commit_timestamp` and
+   * a stateful streaming operator (an aggregate for the row-level passes, a `transformWithState`
+   * for netChanges). Two implications follow:
+   *   - A commit's events are emitted in the next micro-batch after the commit is read
+   *     (append-mode aggregate eviction is `eventTime &lt;= watermark`, and the watermark
+   *     advances to the max `_commit_timestamp` observed in the previous batch). A stream that
+   *     reads its last commit and stops will keep that commit's events in state until a
+   *     subsequent (no-data) micro-batch fires.
+   *   - The query is constrained to `Append` output mode; `Update` and `Complete` are rejected at
+   *     writer-start time with `STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION`. The internal
+   *     watermark metadata is stripped from the user-visible `_commit_timestamp` output, so
+   *     downstream user-supplied watermarks on other columns do not interact with it via the
+   *     global multi-watermark policy.
+   *
    * @param tableName
    *   a qualified or unqualified name that designates a table.
    * @since 4.2.0
@@ -316,18 +366,16 @@ abstract class DataStreamReader {
    *
    * @param sourceName
    *   the source name to validate
+   * @throws AnalysisException
+   *   if the source name contains invalid characters
    * @throws IllegalArgumentException
-   *   if the source name is null, empty, or contains invalid characters
+   *   if the source name is null or empty
    */
   private[sql] def validateSourceName(sourceName: String): Unit = {
-    require(sourceName != null, "Source name cannot be null")
-    require(sourceName.nonEmpty, "Source name cannot be empty")
-
-    val validNamePattern: Regex = "^[a-zA-Z0-9_]+$".r
-    if (!validNamePattern.pattern.matcher(sourceName).matches()) {
-      throw new AnalysisException(
+    StreamingNameValidator.validate(sourceName, "Source") { invalid =>
+      new AnalysisException(
         errorClass = "STREAMING_QUERY_EVOLUTION_ERROR.INVALID_SOURCE_NAME",
-        messageParameters = Map("sourceName" -> sourceName))
+        messageParameters = Map("sourceName" -> invalid))
     }
   }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index cb5ecc728c441..8f98466d1f17e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -20,7 +20,7 @@ import java.util.concurrent.TimeoutException
 
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.api.java.function.VoidFunction2
-import org.apache.spark.sql.{Dataset, ForeachWriter, WriteConfigMethods}
+import org.apache.spark.sql.{AnalysisException, Dataset, ForeachWriter, WriteConfigMethods}
 
 /**
  * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
@@ -90,6 +90,19 @@ abstract class DataStreamWriter[T] extends WriteConfigMethods[DataStreamWriter[T
    */
   def queryName(queryName: String): this.type
 
+  /**
+   * Assigns a name to this streaming sink for sink evolution capability. When sinks are named,
+   * they can be tracked in checkpoint metadata, enabling query evolution.
+   *
+   * If not specified, sinks are automatically assigned a default name based on their position in
+   * the query, which maintains backward compatibility.
+   *
+   * @param sinkName
+   *   the unique name for this sink (alphanumeric and underscore only)
+   * @since 4.1.0
+   */
+  private[sql] def name(sinkName: String): this.type
+
   /**
    * Specifies the underlying output data source.
    *
@@ -217,6 +230,24 @@ abstract class DataStreamWriter[T] extends WriteConfigMethods[DataStreamWriter[T
   @throws[TimeoutException]
   def toTable(tableName: String): StreamingQuery
 
+  /**
+   * Validates that a streaming sink name only contains alphanumeric characters and underscores.
+   *
+   * @param sinkName
+   *   the sink name to validate
+   * @throws AnalysisException
+   *   if the sink name contains invalid characters
+   * @throws IllegalArgumentException
+   *   if the sink name is null or empty
+   */
+  private[sql] def validateSinkName(sinkName: String): Unit = {
+    StreamingNameValidator.validate(sinkName, "Sink") { invalid =>
+      new AnalysisException(
+        errorClass = "STREAMING_QUERY_EVOLUTION_ERROR.INVALID_SINK_NAME",
+        messageParameters = Map("sinkName" -> invalid))
+    }
+  }
+
   ///////////////////////////////////////////////////////////////////////////////////////
   // Covariant Overrides
   ///////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingNameValidator.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingNameValidator.scala
new file mode 100644
index 0000000000000..9b8700844310c
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingNameValidator.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import scala.util.matching.Regex
+
+import org.apache.spark.sql.AnalysisException
+
+/**
+ * Shared validation for user-assigned streaming source and sink names. Names must be non-null,
+ * non-empty, and contain only alphanumeric characters and underscores.
+ */
+private[sql] object StreamingNameValidator {
+  private val validNamePattern: Regex = "^[a-zA-Z0-9_]+$".r
+
+  /**
+   * Validates the given streaming entity name. Throws an `IllegalArgumentException` if the name
+   * is null or empty, and invokes `onInvalid` to build the `AnalysisException` to throw if the
+   * name does not match the allowed character set.
+   *
+   * @param name
+   *   the source/sink name to validate
+   * @param entityKind
+   *   a human-readable label (e.g. "Source", "Sink") used in null/empty messages
+   * @param onInvalid
+   *   builds the AnalysisException to throw when `name` has invalid characters
+   */
+  def validate(name: String, entityKind: String)(onInvalid: String => AnalysisException): Unit = {
+    require(name != null, s"$entityKind name cannot be null")
+    require(name.nonEmpty, s"$entityKind name cannot be empty")
+    if (!validNamePattern.pattern.matcher(name).matches()) {
+      throw onInvalid(name)
+    }
+  }
+}
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index d97e45dec9681..64fcb1fff6847 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java b/sql/catalyst/src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java
new file mode 100644
index 0000000000000..0ec73caca72ae
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/datasketches/memory/internal/ResourceImpl.java
@@ -0,0 +1,561 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.memory.internal;
+
+import static org.apache.datasketches.memory.internal.UnsafeUtil.unsafe;
+import static org.apache.datasketches.memory.internal.Util.characterPad;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.datasketches.memory.MemoryBoundsException;
+import org.apache.datasketches.memory.MemoryRequestServer;
+import org.apache.datasketches.memory.ReadOnlyException;
+import org.apache.datasketches.memory.Resource;
+
+// Ported from https://github.com/apache/datasketches-memory/pull/272
+// which relaxes the `checkJavaVersion` to allow Java 25
+
+/**
+ * Implements the root Resource methods plus some common static variables and check methods.
+ *
+ * @author Lee Rhodes
+ */
+@SuppressWarnings("restriction")
+public abstract class ResourceImpl implements Resource {
+  static final String JDK;
+  static final int JDK_MAJOR; //8, 11, 17, etc
+
+  //Used to convert "type" to bytes:  bytes = longs << LONG_SHIFT
+  static final int BOOLEAN_SHIFT    = 0;
+  static final int BYTE_SHIFT       = 0;
+  static final long SHORT_SHIFT     = 1;
+  static final long CHAR_SHIFT      = 1;
+  static final long INT_SHIFT       = 2;
+  static final long LONG_SHIFT      = 3;
+  static final long FLOAT_SHIFT     = 2;
+  static final long DOUBLE_SHIFT    = 3;
+
+  //class type IDs. Do not change the bit orders
+  //The lowest 3 bits are set dynamically
+  // 0000 0XXX Group 1
+  static final int WRITABLE  = 0; //bit 0 = 0
+  static final int READONLY  = 1; //bit 0
+  static final int REGION    = 2; //bit 1
+  static final int DUPLICATE = 4; //bit 2, for Buffer only
+
+  // 000X X000 Group 2
+  static final int HEAP   = 0;    //bits 3,4 = 0
+  static final int DIRECT = 8;    //bit 3
+  static final int MAP    = 16;   //bit 4, Map is effectively Direct
+
+  // 00X0 0000 Group 3 ByteOrder
+  static final int NATIVE_BO    = 0; //bit 5 = 0
+  static final int NONNATIVE_BO = 32;//bit 5
+
+  // 0X00 0000 Group 4
+  static final int MEMORY = 0;    //bit 6 = 0
+  static final int BUFFER = 64;   //bit 6
+
+  // X000 0000 Group 5
+  static final int BYTEBUF = 128; //bit 7
+
+  /**
+   * The java line separator character as a String.
+   */
+  public static final String LS = System.getProperty("line.separator");
+
+  static final String NOT_MAPPED_FILE_RESOURCE = "This is not a memory-mapped file resource";
+  static final String THREAD_EXCEPTION_TEXT = "Attempted access outside owning thread";
+
+  private static AtomicBoolean JAVA_VERSION_WARNING_PRINTED = new AtomicBoolean(false);
+
+  static {
+    final String jdkVer = System.getProperty("java.version");
+    final int[] p = parseJavaVersion(jdkVer);
+    JDK = p[0] + "." + p[1];
+    JDK_MAJOR = (p[0] == 1) ? p[1] : p[0];
+  }
+
+  //set by the leaf nodes
+  long capacityBytes;
+  long cumOffsetBytes;
+  long offsetBytes;
+  int typeId;
+  Thread owner = null;
+
+  /**
+   * The root of the Memory inheritance hierarchy
+   */
+  ResourceImpl() { }
+
+  //MemoryRequestServer logic
+
+  /**
+   * User specified MemoryRequestServer. Set here and by leaf nodes.
+   */
+  MemoryRequestServer memReqSvr = null;
+
+  @Override
+  public MemoryRequestServer getMemoryRequestServer() {
+    return memReqSvr;
+  }
+
+  @Override
+  public boolean hasMemoryRequestServer() {
+    return memReqSvr != null;
+  }
+
+  @Override
+  public void setMemoryRequestServer(final MemoryRequestServer memReqSvr) { this.memReqSvr = memReqSvr; }
+
+  //***
+
+  /**
+   * Check the requested offset and length against the allocated size.
+   * The invariants equation is: {@code 0 <= reqOff <= reqLen <= reqOff + reqLen <= allocSize}.
+   * If this equation is violated an {@link MemoryBoundsException} will be thrown.
+   * @param reqOff the requested offset
+   * @param reqLen the requested length
+   * @param allocSize the allocated size.
+   * @throws MemoryBoundsException if the given arguments constitute a violation
+   * of the invariants equation expressed above.
+   */
+  public static void checkBounds(final long reqOff, final long reqLen, final long allocSize) {
+    if ((reqOff | reqLen | (reqOff + reqLen) | (allocSize - (reqOff + reqLen))) < 0) {
+      throw new MemoryBoundsException(
+          "reqOffset: " + reqOff + ", reqLength: " + reqLen
+              + ", (reqOff + reqLen): " + (reqOff + reqLen) + ", allocSize: " + allocSize);
+    }
+  }
+
+  /**
+   * Checks the runtime Java Version string. Note that Java 17 and 21 is allowed only because some clients do not use the
+   * WritableMemory.allocateDirect(..) and related functions, which will not work with Java versions >= 14.
+   * The on-heap functions may work with 17 and 21, nonetheless, versions > Java 11 are not officially supported.
+   * Caveat emptor.
+   * @param jdkVer the <i>System.getProperty("java.version")</i> string of the form "p0.p1.X"
+   * @param p0 The first number group
+   * @param p1 The second number group
+   */
+  static void checkJavaVersion(final String jdkVer, final int p0, final int p1 ) {
+    final boolean ok = ((p0 == 1) && (p1 == 8)) || (p0 == 8) || (p0 == 11) || (p0 == 17 || (p0 == 21) || (p0 == 25));
+    if (!ok) { throw new IllegalArgumentException(
+        "Unsupported JDK Major Version. It must be one of 1.8, 8, 11, 17, 21, 25: " + jdkVer);
+    }
+    if (p0 > 11 && JAVA_VERSION_WARNING_PRINTED.compareAndSet(false, true)) {
+      System.err.println(
+          "Warning: Java versions > Java 11 can only operate in restricted mode where no off-heap operations are allowed!");
+    }
+  }
+
+  void checkNotReadOnly() {
+    if (isReadOnly()) {
+      throw new ReadOnlyException("Cannot write to a read-only Resource.");
+    }
+  }
+
+  /**
+   * This checks that the current thread is the same as the given owner thread.
+   * @Throws IllegalStateException if it is not.
+   * @param owner the given owner thread.
+   */
+  static final void checkThread(final Thread owner) {
+    if (owner != Thread.currentThread()) {
+      throw new IllegalStateException(THREAD_EXCEPTION_TEXT);
+    }
+  }
+
+  /**
+   * @throws IllegalStateException if this Resource is AutoCloseable, and already closed, i.e., not <em>alive</em>.
+   */
+  void checkValid() {
+    if (!isAlive()) {
+      throw new IllegalStateException("this Resource is AutoCloseable, and already closed, i.e., not <em>alive</em>.");
+    }
+  }
+
+  /**
+   * Checks that this resource is still valid and throws a MemoryInvalidException if it is not.
+   * Checks that the specified range of bytes is within bounds of this resource, throws
+   * {@link MemoryBoundsException} if it's not: i. e. if offsetBytes &lt; 0, or length &lt; 0,
+   * or offsetBytes + length &gt; {@link #getCapacity()}.
+   * @param offsetBytes the given offset in bytes of this object
+   * @param lengthBytes the given length in bytes of this object
+   * @throws IllegalStateException if this resource is AutoCloseable and is no longer valid, i.e.,
+   * it has already been closed.
+   * @throws MemoryBoundsException if this resource violates the memory bounds of this resource.
+   */
+  public final void checkValidAndBounds(final long offsetBytes, final long lengthBytes) {
+    checkValid();
+    checkBounds(offsetBytes, lengthBytes, getCapacity());
+  }
+
+  /**
+   * Checks that this resource is still valid and throws a MemoryInvalidException if it is not.
+   * Checks that the specified range of bytes is within bounds of this resource, throws
+   * {@link MemoryBoundsException} if it's not: i. e. if offsetBytes &lt; 0, or length &lt; 0,
+   * or offsetBytes + length &gt; {@link #getCapacity()}.
+   * Checks that this operation is a read-only operation and throws a ReadOnlyException if not.
+   * @param offsetBytes the given offset in bytes of this object
+   * @param lengthBytes the given length in bytes of this object
+   * @Throws MemoryInvalidException if this resource is AutoCloseable and is no longer valid, i.e.,
+   * it has already been closed.
+   * @Throws MemoryBoundsException if this resource violates the memory bounds of this resource.
+   * @Throws ReadOnlyException if the associated operation is not a Read-only operation.
+   */
+  final void checkValidAndBoundsForWrite(final long offsetBytes, final long lengthBytes) {
+    checkValid();
+    checkBounds(offsetBytes, lengthBytes, getCapacity());
+    if (isReadOnly()) {
+      throw new ReadOnlyException("Memory is read-only.");
+    }
+  }
+
+  @Override
+  public void close() {
+    /* Overridden by the leaf sub-classes that need AutoCloseable. */
+  }
+
+  @Override
+  public final boolean equalTo(final long thisOffsetBytes, final Resource that,
+      final long thatOffsetBytes, final long lengthBytes) {
+    if (that == null) { return false; }
+    return CompareAndCopy.equals(this, thisOffsetBytes, (ResourceImpl) that, thatOffsetBytes, lengthBytes);
+  }
+
+  @Override
+  public void force() { //overridden by Map Leaves
+    throw new UnsupportedOperationException(NOT_MAPPED_FILE_RESOURCE);
+  }
+
+  //Overridden by ByteBuffer Leaves. Used internally and for tests.
+  ByteBuffer getByteBuffer() {
+    return null;
+  }
+
+  @Override
+  public final ByteOrder getTypeByteOrder() {
+    return isNativeOrder(getTypeId()) ? Util.NATIVE_BYTE_ORDER : Util.NON_NATIVE_BYTE_ORDER;
+  }
+
+  @Override
+  public long getCapacity() {
+    checkValid();
+    return capacityBytes;
+  }
+
+  @Override
+  public long getCumulativeOffset(final long addOffsetBytes) {
+    return cumOffsetBytes + addOffsetBytes;
+  }
+
+  @Override
+  public long getRelativeOffset() {
+    return offsetBytes;
+  }
+
+  //Overridden by all leaves
+  int getTypeId() {
+    return typeId;
+  }
+
+  //Overridden by Heap and ByteBuffer leaves. Made public as getArray() in BaseWritableMemoryImpl and BaseWritableBufferImpl
+  Object getUnsafeObject() {
+    return null;
+  }
+
+  @Override
+  public boolean hasByteBuffer() {
+    return (getTypeId() & BYTEBUF) > 0;
+  }
+
+  @Override
+  public final boolean isByteOrderCompatible(final ByteOrder byteOrder) {
+    final ByteOrder typeBO = getTypeByteOrder();
+    return typeBO == ByteOrder.nativeOrder() && typeBO == byteOrder;
+  }
+
+  static final boolean isBuffer(final int typeId) {
+    return (typeId & BUFFER) > 0;
+  }
+
+  @Override
+  public boolean isCloseable() {
+    return (getTypeId() & (MAP | DIRECT)) > 0 && isAlive();
+  }
+
+  @Override
+  public final boolean isDirect() {
+    return getUnsafeObject() == null;
+  }
+
+  @Override
+  public boolean isDuplicate() {
+    return (getTypeId() & DUPLICATE) > 0;
+  }
+
+  @Override
+  public final boolean isHeap() {
+    checkValid();
+    return getUnsafeObject() != null;
+  }
+
+  @Override
+  public boolean isLoaded() { //overridden by Map Leaves
+    throw new IllegalStateException(NOT_MAPPED_FILE_RESOURCE);
+  }
+
+  @Override
+  public boolean isMapped() {
+    return (getTypeId() & MAP) > 0;
+  }
+
+  @Override
+  public boolean isMemory() {
+    return (getTypeId() & BUFFER) == 0;
+  }
+
+  static final boolean isNativeOrder(final int typeId) { //not used
+    return (typeId & NONNATIVE_BO) == 0;
+  }
+
+  @Override
+  public boolean isNonNativeOrder() {
+    return (getTypeId() & NONNATIVE_BO) > 0;
+  }
+
+  @Override
+  public final boolean isReadOnly() {
+    checkValid();
+    return (getTypeId() & READONLY) > 0;
+  }
+
+  @Override
+  public boolean isRegionView() {
+    return (getTypeId() & REGION) > 0;
+  }
+
+  @Override
+  public boolean isSameResource(final Resource that) {
+    checkValid();
+    if (that == null) { return false; }
+    final ResourceImpl that1 = (ResourceImpl) that;
+    that1.checkValid();
+    if (this == that1) { return true; }
+    return getCumulativeOffset(0) == that1.getCumulativeOffset(0)
+            && getCapacity() == that1.getCapacity()
+            && getUnsafeObject() == that1.getUnsafeObject()
+            && getByteBuffer() == that1.getByteBuffer();
+  }
+
+  //Overridden by Direct and Map leaves
+  @Override
+  public boolean isAlive() {
+    return true;
+  }
+
+  @Override
+  public void load() { //overridden by Map leaves
+    throw new IllegalStateException(NOT_MAPPED_FILE_RESOURCE);
+  }
+
+  private static String pad(final String s, final int fieldLen) {
+    return characterPad(s, fieldLen, ' ' , true);
+  }
+
+  /**
+   * Returns first two number groups of the java version string.
+   * @param jdkVer the java version string from System.getProperty("java.version").
+   * @return first two number groups of the java version string.
+   * @throws IllegalArgumentException for an improper Java version string.
+   */
+  static int[] parseJavaVersion(final String jdkVer) {
+    final int p0, p1;
+    try {
+      String[] parts = jdkVer.trim().split("^0-9\\.");//grab only number groups and "."
+      parts = parts[0].split("\\."); //split out the number groups
+      p0 = Integer.parseInt(parts[0]); //the first number group
+      p1 = (parts.length > 1) ? Integer.parseInt(parts[1]) : 0; //2nd number group, or 0
+    } catch (final NumberFormatException | ArrayIndexOutOfBoundsException  e) {
+      throw new IllegalArgumentException("Improper Java -version string: " + jdkVer + LS + e);
+    }
+    checkJavaVersion(jdkVer, p0, p1);
+    return new int[] {p0, p1};
+  }
+
+  //REACHABILITY FENCE
+  static void reachabilityFence(final Object obj) { }
+
+  final static int removeNnBuf(final int typeId) { return typeId & ~NONNATIVE_BO & ~BUFFER; }
+
+  final static int setReadOnlyBit(final int typeId, final boolean readOnly) {
+    return readOnly ? typeId | READONLY : typeId & ~READONLY;
+  }
+
+  /**
+   * Returns a formatted hex string of an area of this object.
+   * Used primarily for testing.
+   * @param state the ResourceImpl
+   * @param preamble a descriptive header
+   * @param offsetBytes offset bytes relative to the MemoryImpl start
+   * @param lengthBytes number of bytes to convert to a hex string
+   * @return a formatted hex string in a human readable array
+   */
+  static final String toHex(final ResourceImpl state, final String preamble, final long offsetBytes, final int lengthBytes,
+      final boolean withData) {
+    final long capacity = state.getCapacity();
+    ResourceImpl.checkBounds(offsetBytes, lengthBytes, capacity);
+    final StringBuilder sb = new StringBuilder();
+    final Object uObj = state.getUnsafeObject();
+    final String uObjStr;
+    final long uObjHeader;
+    if (uObj == null) {
+      uObjStr = "null";
+      uObjHeader = 0;
+    } else {
+      uObjStr =  uObj.getClass().getSimpleName() + ", " + (uObj.hashCode() & 0XFFFFFFFFL);
+      uObjHeader = UnsafeUtil.getArrayBaseOffset(uObj.getClass());
+    }
+    final ByteBuffer bb = state.getByteBuffer();
+    final String bbStr = bb == null ? "null"
+            : bb.getClass().getSimpleName() + ", " + (bb.hashCode() & 0XFFFFFFFFL);
+    final MemoryRequestServer memReqSvr = state.getMemoryRequestServer();
+    final String memReqStr = memReqSvr != null
+        ? memReqSvr.getClass().getSimpleName() + ", " + (memReqSvr.hashCode() & 0XFFFFFFFFL)
+        : "null";
+    final long cumBaseOffset = state.getCumulativeOffset(0);
+    sb.append(preamble).append(LS);
+    sb.append("UnsafeObj, hashCode : ").append(uObjStr).append(LS);
+    sb.append("UnsafeObjHeader     : ").append(uObjHeader).append(LS);
+    sb.append("ByteBuf, hashCode   : ").append(bbStr).append(LS);
+    sb.append("RegionOffset        : ").append(state.getRelativeOffset()).append(LS);
+    if (ResourceImpl.isBuffer(state.typeId)) {
+      sb.append("Start               : ").append(((PositionalImpl)state).getStart()).append(LS);
+      sb.append("Position            : ").append(((PositionalImpl)state).getPosition()).append(LS);
+      sb.append("End                 : ").append(((PositionalImpl)state).getEnd()).append(LS);
+    }
+    sb.append("Capacity            : ").append(capacity).append(LS);
+    sb.append("CumBaseOffset       : ").append(cumBaseOffset).append(LS);
+    sb.append("MemReqSvr, hashCode : ").append(memReqStr).append(LS);
+    sb.append("is Alive            : ").append(state.isAlive()).append(LS);
+    sb.append("Read Only           : ").append(state.isReadOnly()).append(LS);
+    sb.append("Type Byte Order     : ").append(state.getTypeByteOrder().toString()).append(LS);
+    sb.append("Native Byte Order   : ").append(ByteOrder.nativeOrder().toString()).append(LS);
+    sb.append("JDK Runtime Version : ").append(JDK).append(LS);
+    //Data detail
+    if (withData) {
+      sb.append("Data, bytes         :  0  1  2  3  4  5  6  7");
+
+      for (long i = 0; i < lengthBytes; i++) {
+        final int b = unsafe.getByte(uObj, cumBaseOffset + offsetBytes + i) & 0XFF;
+        if (i % 8 == 0) { //row header
+          sb.append(String.format("%n%20s: ", offsetBytes + i));
+        }
+        sb.append(String.format("%02x ", b));
+      }
+      sb.append(LS);
+    }
+    sb.append("### END SUMMARY");
+    return sb.toString();
+  }
+
+  @Override
+  public final String toString(final String header, final long offsetBytes, final int lengthBytes,
+      final boolean withData) {
+    checkValid();
+    final String klass = this.getClass().getSimpleName();
+    final String s1 = String.format("(..., %d, %d)", offsetBytes, lengthBytes);
+    final long hcode = hashCode() & 0XFFFFFFFFL;
+    final String call = ".toHexString" + s1 + ", hashCode: " + hcode;
+    final StringBuilder sb = new StringBuilder();
+    sb.append("### ").append(klass).append(" SUMMARY ###").append(LS);
+    sb.append("Type Info           : ").append(typeDecode(typeId)).append(LS + LS);
+    sb.append("Header Comment      : ").append(header).append(LS);
+    sb.append("Call Parameters     : ").append(call);
+    return toHex(this, sb.toString(), offsetBytes, lengthBytes, withData);
+  }
+
+  @Override
+  public final String toString() {
+    return toString("", 0, (int)this.getCapacity(), false);
+  }
+
+  /**
+   * Decodes the resource type. This is primarily for debugging.
+   * @param typeId the given typeId
+   * @return a human readable string.
+   */
+  static final String typeDecode(final int typeId) {
+    final StringBuilder sb = new StringBuilder();
+    final int group1 = typeId & 0x7;
+    switch (group1) { // 0000 0XXX
+      case 0 : sb.append(pad("Writable + ",32)); break;
+      case 1 : sb.append(pad("ReadOnly + ",32)); break;
+      case 2 : sb.append(pad("Writable + Region + ",32)); break;
+      case 3 : sb.append(pad("ReadOnly + Region + ",32)); break;
+      case 4 : sb.append(pad("Writable + Duplicate + ",32)); break;
+      case 5 : sb.append(pad("ReadOnly + Duplicate + ",32)); break;
+      case 6 : sb.append(pad("Writable + Region + Duplicate + ",32)); break;
+      case 7 : sb.append(pad("ReadOnly + Region + Duplicate + ",32)); break;
+      default: break;
+    }
+    final int group2 = (typeId >>> 3) & 0x3;
+    switch (group2) { // 000X X000
+      case 0 : sb.append(pad("Heap + ",15)); break;
+      case 1 : sb.append(pad("Direct + ",15)); break;
+      case 2 : sb.append(pad("Map + Direct + ",15)); break;
+      case 3 : sb.append(pad("Map + Direct + ",15)); break;
+      default: break;
+    }
+    final int group3 = (typeId >>> 5) & 0x1;
+    switch (group3) { // 00X0 0000
+      case 0 : sb.append(pad("NativeOrder + ",17)); break;
+      case 1 : sb.append(pad("NonNativeOrder + ",17)); break;
+      default: break;
+    }
+    final int group4 = (typeId >>> 6) & 0x1;
+    switch (group4) { // 0X00 0000
+      case 0 : sb.append(pad("Memory + ",9)); break;
+      case 1 : sb.append(pad("Buffer + ",9)); break;
+      default: break;
+    }
+    final int group5 = (typeId >>> 7) & 0x1;
+    switch (group5) { // X000 0000
+      case 0 : sb.append(pad("",10)); break;
+      case 1 : sb.append(pad("ByteBuffer",10)); break;
+      default: break;
+    }
+    return sb.toString();
+  }
+
+  @Override
+  public final long xxHash64(final long offsetBytes, final long lengthBytes, final long seed) {
+    checkValid();
+    return XxHash64.hash(getUnsafeObject(), getCumulativeOffset(0) + offsetBytes, lengthBytes, seed);
+  }
+
+  @Override
+  public final long xxHash64(final long in, final long seed) {
+    return XxHash64.hash(in, seed);
+  }
+
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGetters.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGetters.java
index 2a3a6884c3c6e..c22b7088a3da3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGetters.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGetters.java
@@ -22,11 +22,10 @@
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.sql.catalyst.util.MapData;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
 
 public interface SpecializedGetters {
 
@@ -52,9 +51,14 @@ public interface SpecializedGetters {
 
   byte[] getBinary(int ordinal);
 
-  GeographyVal getGeography(int ordinal);
-
-  GeometryVal getGeometry(int ordinal);
+  /**
+   * Returns the opaque-bytes physical value at {@code ordinal} as a {@link BinaryView}. Used by
+   * logical types whose physical representation is "an opaque chunk of bytes" - currently
+   * GEOMETRY and GEOGRAPHY. Returns {@code null} if the slot is null. The returned view may
+   * alias the underlying buffer; callers that need to retain it past the source's lifetime
+   * must call {@link BinaryView#copy()}.
+   */
+  BinaryView getBinaryView(int ordinal);
 
   CalendarInterval getInterval(int ordinal);
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
index 830aa0d0d0fb4..e86e4ac40be16 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
@@ -60,11 +60,8 @@ public static Object read(
     if (physicalDataType instanceof PhysicalStringType) {
       return obj.getUTF8String(ordinal);
     }
-    if (physicalDataType instanceof PhysicalGeographyType) {
-      return obj.getGeography(ordinal);
-    }
-    if (physicalDataType instanceof PhysicalGeometryType) {
-      return obj.getGeometry(ordinal);
+    if (physicalDataType instanceof PhysicalBinaryViewType) {
+      return obj.getBinaryView(ordinal);
     }
     if (physicalDataType instanceof PhysicalDecimalType dt) {
       return obj.getDecimal(ordinal, dt.precision(), dt.scale());
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 09ac634955fcb..e0407a61267d1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -40,8 +40,7 @@
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 
 import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
 
@@ -225,15 +224,12 @@ public byte[] getBinary(int ordinal) {
   }
 
   @Override
-  public GeographyVal getGeography(int ordinal) {
-    byte[] bytes = getBinary(ordinal);
-    return (bytes == null) ? null : GeographyVal.fromBytes(bytes);
-  }
-
-  @Override
-  public GeometryVal getGeometry(int ordinal) {
-    byte[] bytes = getBinary(ordinal);
-    return (bytes == null) ? null : GeometryVal.fromBytes(bytes);
+  public BinaryView getBinaryView(int ordinal) {
+    if (isNullAt(ordinal)) return null;
+    final long offsetAndSize = getLong(ordinal);
+    final int offset = (int) (offsetAndSize >> 32);
+    final int size = (int) offsetAndSize;
+    return BinaryView.fromAddress(baseObject, baseOffset + offset, size);
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index ff9eeea9bf126..6633ff8cbe0f5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -37,11 +37,10 @@
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.bitset.BitSetMethods;
 import org.apache.spark.unsafe.hash.Murmur3_x86_32;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
 
 import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
 
@@ -420,15 +419,12 @@ public byte[] getBinary(int ordinal) {
   }
 
   @Override
-  public GeographyVal getGeography(int ordinal) {
-    byte[] bytes = getBinary(ordinal);
-    return (bytes == null) ? null : GeographyVal.fromBytes(bytes);
-  }
-
-  @Override
-  public GeometryVal getGeometry(int ordinal) {
-    byte[] bytes = getBinary(ordinal);
-    return (bytes == null) ? null : GeometryVal.fromBytes(bytes);
+  public BinaryView getBinaryView(int ordinal) {
+    if (isNullAt(ordinal)) return null;
+    final long offsetAndSize = getLong(ordinal);
+    final int offset = (int) (offsetAndSize >> 32);
+    final int size = (int) offsetAndSize;
+    return BinaryView.fromAddress(baseObject, baseOffset + offset, size);
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
index e2abc108bb1bc..4b0ca0521ae1b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
@@ -24,8 +24,7 @@
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.bitset.BitSetMethods;
 import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
 
@@ -113,12 +112,8 @@ public final void write(int ordinal, UTF8String input) {
     writeUnalignedBytes(ordinal, input.getBaseObject(), input.getBaseOffset(), input.numBytes());
   }
 
-  public final void write(int ordinal, GeographyVal input) {
-    write(ordinal, input.getBytes());
-  }
-
-  public final void write(int ordinal, GeometryVal input) {
-    write(ordinal, input.getBytes());
+  public final void write(int ordinal, BinaryView input) {
+    writeUnalignedBytes(ordinal, input.getBaseObject(), input.getBaseOffset(), input.numBytes());
   }
 
   public final void write(int ordinal, byte[] input) {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geo.java
index bf723a8efef91..1002cdd28a775 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geo.java
@@ -58,7 +58,6 @@ interface Geo {
   /** Binary converters. */
 
   // Returns the Well-Known Binary (WKB) representation of the geo object.
-  byte[] toWkb();
   byte[] toWkb(ByteOrder endianness);
 
   // Returns the Extended Well-Known Binary (EWKB) representation of the geo object.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
index f446d71d5a5b8..21d07a54a1b7c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
@@ -21,7 +21,7 @@
 import org.apache.spark.sql.catalyst.util.geo.WkbReader;
 import org.apache.spark.sql.catalyst.util.geo.WkbWriter;
 import org.apache.spark.sql.errors.QueryExecutionErrors;
-import org.apache.spark.unsafe.types.GeographyVal;
+import org.apache.spark.unsafe.types.BinaryView;
 
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
@@ -33,8 +33,8 @@ public final class Geography implements Geo {
 
   /** Geography internal implementation. */
 
-  // The Geography type is implemented as an array of bytes stored inside a `GeographyVal` object.
-  protected final GeographyVal value;
+  // The GEOGRAPHY physical value is an opaque chunk of bytes, carried as a {@link BinaryView}.
+  protected final BinaryView value;
 
   /** Geography constants. */
 
@@ -45,26 +45,26 @@ public final class Geography implements Geo {
 
   // We make the constructors private. Use `fromBytes` or `fromValue` to create new instances.
   private Geography(byte[] bytes) {
-    this.value = GeographyVal.fromBytes(bytes);
+    this.value = BinaryView.fromBytes(bytes);
   }
 
-  private Geography(GeographyVal value) {
+  private Geography(BinaryView value) {
     this.value = value;
   }
 
-  // Factory methods to create new Geography instances from a byte array or a `GeographyVal`.
+  // Factory methods to create new Geography instances from a byte array or a {@link BinaryView}.
   public static Geography fromBytes(byte[] bytes) {
     return new Geography(bytes);
   }
 
-  public static Geography fromValue(GeographyVal value) {
+  public static Geography fromValue(BinaryView value) {
     return new Geography(value);
   }
 
   /** Geography getters and instance methods. */
 
-  // Returns the underlying physical type value of this Geography instance.
-  public GeographyVal getValue() {
+  // Returns the underlying physical-type value of this Geography instance.
+  public BinaryView getValue() {
     return value;
   }
 
@@ -73,10 +73,11 @@ public byte[] getBytes() {
     return value.getBytes();
   }
 
-  // Returns a copy of this geography.
+  // Returns a copy of this geography that owns its own backing buffer. Required before
+  // calling mutating methods like {@link #setSrid(int)} on a value that was read directly
+  // from an UnsafeRow / ColumnVector buffer.
   public Geography copy() {
-    byte[] bytes = getBytes();
-    return Geography.fromBytes(Arrays.copyOf(bytes, bytes.length));
+    return new Geography(value.copy());
   }
 
   /** Geography WKB parsing. */
@@ -129,11 +130,6 @@ public static Geography fromEwkt(byte[] ewkt) {
 
   /** Geography binary standard format converters: WKB and EWKB. */
 
-  @Override
-  public byte[] toWkb() {
-    return toWkbInternal(DEFAULT_ENDIANNESS);
-  }
-
   @Override
   public byte[] toWkb(ByteOrder endianness) {
     return toWkbInternal(endianness);
@@ -188,6 +184,13 @@ public int srid() {
   @Override
   public void setSrid(int srid) {
     // This method sets the SRID value in the in-memory Geography representation header.
+    // It mutates the backing buffer in place, which only writes through when this value owns
+    // a tight, on-heap array. For a sliced / sub-range / off-heap view, getBytes() (and hence
+    // getWrapper()) returns a throwaway copy and the write would be silently lost, so fail
+    // loudly and direct the caller to copy() first.
+    if (!value.hasTightOnHeapArray()) {
+      throw QueryExecutionErrors.cannotMutateReadOnlyGeoValueError();
+    }
     getWrapper().putInt(SRID_OFFSET, srid);
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
index 58be589b86ff7..973abd3e7de0c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
@@ -21,7 +21,7 @@
 import org.apache.spark.sql.catalyst.util.geo.WkbReader;
 import org.apache.spark.sql.catalyst.util.geo.WkbWriter;
 import org.apache.spark.sql.errors.QueryExecutionErrors;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
@@ -33,8 +33,8 @@ public final class Geometry implements Geo {
 
   /** Geometry internal implementation. */
 
-  // The Geometry type is implemented as an array of bytes stored inside a `GeometryVal` object.
-  protected final GeometryVal value;
+  // The GEOMETRY physical value is an opaque chunk of bytes, carried as a {@link BinaryView}.
+  protected final BinaryView value;
 
   /** Geometry constants. */
 
@@ -45,26 +45,26 @@ public final class Geometry implements Geo {
 
   // We make the constructors private. Use `fromBytes` or `fromValue` to create new instances.
   private Geometry(byte[] bytes) {
-    this.value = GeometryVal.fromBytes(bytes);
+    this.value = BinaryView.fromBytes(bytes);
   }
 
-  private Geometry(GeometryVal value) {
+  private Geometry(BinaryView value) {
     this.value = value;
   }
 
-  // Factory methods to create new Geometry instances from a byte array or a `GeometryVal`.
+  // Factory methods to create new Geometry instances from a byte array or a {@link BinaryView}.
   public static Geometry fromBytes(byte[] bytes) {
     return new Geometry(bytes);
   }
 
-  public static Geometry fromValue(GeometryVal value) {
+  public static Geometry fromValue(BinaryView value) {
     return new Geometry(value);
   }
 
   /** Geometry getters and instance methods. */
 
-  // Returns the underlying physical type value of this Geometry instance.
-  public GeometryVal getValue() {
+  // Returns the underlying physical-type value of this Geometry instance.
+  public BinaryView getValue() {
     return value;
   }
 
@@ -73,10 +73,11 @@ public byte[] getBytes() {
     return value.getBytes();
   }
 
-  // Returns a copy of this geometry.
+  // Returns a copy of this geometry that owns its own backing buffer. Required before
+  // calling mutating methods like {@link #setSrid(int)} on a value that was read directly
+  // from an UnsafeRow / ColumnVector buffer.
   public Geometry copy() {
-    byte[] bytes = getBytes();
-    return Geometry.fromBytes(Arrays.copyOf(bytes, bytes.length));
+    return new Geometry(value.copy());
   }
 
   /** Geometry WKB parsing. */
@@ -129,11 +130,6 @@ public static Geometry fromEwkt(byte[] ewkt) {
 
   /** Geometry binary standard format converters: WKB and EWKB. */
 
-  @Override
-  public byte[] toWkb() {
-    return toWkbInternal(DEFAULT_ENDIANNESS);
-  }
-
   @Override
   public byte[] toWkb(ByteOrder endianness) {
     return toWkbInternal(endianness);
@@ -191,6 +187,13 @@ public int srid() {
   @Override
   public void setSrid(int srid) {
     // This method sets the SRID value in the in-memory Geometry representation header.
+    // It mutates the backing buffer in place, which only writes through when this value owns
+    // a tight, on-heap array. For a sliced / sub-range / off-heap view, getBytes() (and hence
+    // getWrapper()) returns a throwaway copy and the write would be silently lost, so fail
+    // loudly and direct the caller to copy() first.
+    if (!value.hasTightOnHeapArray()) {
+      throw QueryExecutionErrors.cannotMutateReadOnlyGeoValueError();
+    }
     getWrapper().putInt(SRID_OFFSET, srid);
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
index 4026bbb2c22df..d1ce3293fb8fd 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
@@ -21,49 +21,54 @@
 import org.apache.spark.sql.errors.QueryExecutionErrors;
 import org.apache.spark.sql.types.GeographyType;
 import org.apache.spark.sql.types.GeometryType;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.UTF8String;
 
+import java.nio.ByteOrder;
+
 // This class defines static methods that used to implement ST expressions using `StaticInvoke`.
 public final class STUtils {
 
   /** Conversion methods from physical values to Geography/Geometry objects. */
 
-  // Converts a GEOGRAPHY from its physical value to the corresponding `Geography` object
-  static Geography fromPhysVal(GeographyVal value) {
+  // Converts a GEOGRAPHY physical value to the corresponding `Geography` object. The returned
+  // Geography may share its backing array with {@code value} (see {@link BinaryView#getBytes}),
+  // so callers that need to mutate it (e.g. via `setSrid`) must first call `Geography.copy()`.
+  static Geography geogFromPhysVal(BinaryView value) {
     return Geography.fromBytes(value.getBytes());
   }
 
-  // Converts a GEOMETRY from its physical value to the corresponding `Geometry` object
-  static Geometry fromPhysVal(GeometryVal value) {
+  // Converts a GEOMETRY physical value to the corresponding `Geometry` object. The returned
+  // Geometry may share its backing array with {@code value} (see {@link BinaryView#getBytes}),
+  // so callers that need to mutate it (e.g. via `setSrid`) must first call `Geometry.copy()`.
+  static Geometry geomFromPhysVal(BinaryView value) {
     return Geometry.fromBytes(value.getBytes());
   }
 
   /** Conversion methods from Geography/Geometry objects to physical values. */
 
-  // Converts a `Geography` object to the corresponding GEOGRAPHY physical value.
-  static GeographyVal toPhysVal(Geography g) {
+  // Converts a `Geography` object to its physical GEOGRAPHY value.
+  static BinaryView toPhysVal(Geography g) {
     return g.getValue();
   }
 
-  // Converts a `Geometry` object to the corresponding GEOMETRY physical value.
-  static GeometryVal toPhysVal(Geometry g) {
+  // Converts a `Geometry` object to its physical GEOMETRY value.
+  static BinaryView toPhysVal(Geometry g) {
     return g.getValue();
   }
 
   /** Geospatial type casting utility methods. */
 
   // Cast geometry to geography.
-  public static GeographyVal geometryToGeography(GeometryVal geometryVal) {
+  public static BinaryView geometryToGeography(BinaryView geometryVal) {
     // We first need to check whether the input geometry has a geographic SRID.
-    int srid = stSrid(geometryVal);
+    int srid = stGeomSrid(geometryVal);
     if(!GeographyType.isSridSupported(srid)) {
       throw QueryExecutionErrors.stInvalidSridValueError(String.valueOf(srid));
     }
     // We also need to check whether the input geometry has coordinates in geography bounds.
     try {
-      byte[] wkb = stAsBinary(geometryVal);
+      byte[] wkb = stGeomAsBinary(geometryVal);
       new WkbReader(true).read(wkb, srid);
     } catch (WkbParseException e) {
       throw QueryExecutionErrors.wkbParseError(e.getParseError(), e.getPosition());
@@ -72,7 +77,7 @@ public static GeographyVal geometryToGeography(GeometryVal geometryVal) {
   }
 
   // Cast geography to geometry.
-  public static GeometryVal geographyToGeometry(GeographyVal geographyVal) {
+  public static BinaryView geographyToGeometry(BinaryView geographyVal) {
     // Geographic SRID is always a valid SRID for geometry, so we don't need to check it.
     // Also, all geographic coordinates are valid for geometry, so no need to check bounds.
     return toPhysVal(Geometry.fromBytes(geographyVal.getBytes()));
@@ -80,14 +85,14 @@ public static GeometryVal geographyToGeometry(GeographyVal geographyVal) {
 
   /** Geospatial type encoder/decoder utilities. */
 
-  public static GeometryVal serializeGeomFromWKB(org.apache.spark.sql.types.Geometry geometry,
+  public static BinaryView serializeGeomFromWKB(org.apache.spark.sql.types.Geometry geometry,
       GeometryType gt) {
     int geometrySrid = geometry.getSrid();
     gt.assertSridAllowedForType(geometrySrid);
     return toPhysVal(Geometry.fromWkb(geometry.getBytes(), geometrySrid));
   }
 
-  public static GeographyVal serializeGeogFromWKB(org.apache.spark.sql.types.Geography geography,
+  public static BinaryView serializeGeogFromWKB(org.apache.spark.sql.types.Geography geography,
       GeographyType gt) {
     int geographySrid = geography.getSrid();
     gt.assertSridAllowedForType(geographySrid);
@@ -95,52 +100,83 @@ public static GeographyVal serializeGeogFromWKB(org.apache.spark.sql.types.Geogr
   }
 
   public static org.apache.spark.sql.types.Geometry deserializeGeom(
-      GeometryVal geometry, GeometryType gt) {
-    int geometrySrid = stSrid(geometry);
+      BinaryView geometry, GeometryType gt) {
+    int geometrySrid = stGeomSrid(geometry);
     gt.assertSridAllowedForType(geometrySrid);
-    byte[] wkb = stAsBinary(geometry);
+    byte[] wkb = stGeomAsBinary(geometry);
     return org.apache.spark.sql.types.Geometry.fromWKB(wkb, geometrySrid);
   }
 
   public static org.apache.spark.sql.types.Geography deserializeGeog(
-      GeographyVal geography, GeographyType gt) {
-    int geographySrid = stSrid(geography);
+      BinaryView geography, GeographyType gt) {
+    int geographySrid = stGeogSrid(geography);
     gt.assertSridAllowedForType(geographySrid);
-    byte[] wkb = stAsBinary(geography);
+    byte[] wkb = stGeogAsBinary(geography);
     return org.apache.spark.sql.types.Geography.fromWKB(wkb, geographySrid);
   }
 
-  /** Methods for implementing ST expressions. */
+  /** Methods for implementing ST expressions.
+   *
+   * The ST_AsBinary, ST_AsEWKT, ST_Srid, and ST_SetSrid expression families each have a
+   * {@code stGeog*} and a {@code stGeom*} variant. The variants exist for
+   * {@link org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke} dispatch only:
+   * the GEOMETRY/GEOGRAPHY physical type is the same opaque byte chunk, but the WKB
+   * validation rules and SRID checks differ, so the ST expressions pick the variant from
+   * the input's logical {@link org.apache.spark.sql.types.DataType}.
+   */
+
+  private static ByteOrder parseEndianness(UTF8String endianness) {
+    String endiannessString = endianness.toString();
+    if (endiannessString.equalsIgnoreCase("NDR")) return ByteOrder.LITTLE_ENDIAN;
+    if (endiannessString.equalsIgnoreCase("XDR")) return ByteOrder.BIG_ENDIAN;
+    throw QueryExecutionErrors.stInvalidArgumentErrorInvalidEndiannessValue(endiannessString);
+  }
 
   // ST_AsBinary
-  public static byte[] stAsBinary(GeographyVal geo) {
-    return fromPhysVal(geo).toWkb();
+  public static byte[] stGeogAsBinary(BinaryView geo) {
+    return geogFromPhysVal(geo).toWkb(ByteOrder.LITTLE_ENDIAN);
+  }
+
+  public static byte[] stGeogAsBinary(BinaryView geo, UTF8String endianness) {
+    return geogFromPhysVal(geo).toWkb(parseEndianness(endianness));
+  }
+
+  public static byte[] stGeomAsBinary(BinaryView geo) {
+    return geomFromPhysVal(geo).toWkb(ByteOrder.LITTLE_ENDIAN);
   }
 
-  public static byte[] stAsBinary(GeometryVal geo) {
-    return fromPhysVal(geo).toWkb();
+  public static byte[] stGeomAsBinary(BinaryView geo, UTF8String endianness) {
+    return geomFromPhysVal(geo).toWkb(parseEndianness(endianness));
   }
 
   // ST_AsEWKT
-  public static UTF8String stAsEwkt(GeographyVal geo) {
-    return UTF8String.fromBytes(fromPhysVal(geo).toEwkt());
+  public static UTF8String stGeogAsEwkt(BinaryView geo) {
+    return UTF8String.fromBytes(geogFromPhysVal(geo).toEwkt());
   }
 
-  public static UTF8String stAsEwkt(GeometryVal geo) {
-    return UTF8String.fromBytes(fromPhysVal(geo).toEwkt());
+  public static UTF8String stGeomAsEwkt(BinaryView geo) {
+    return UTF8String.fromBytes(geomFromPhysVal(geo).toEwkt());
   }
 
   // ST_GeogFromWKB
-  public static GeographyVal stGeogFromWKB(byte[] wkb) {
+  public static BinaryView stGeogFromWKB(byte[] wkb) {
     return toPhysVal(Geography.fromWkb(wkb));
   }
 
+  public static BinaryView stGeogFromWKB(byte[] wkb, int srid) {
+    // We only allow setting the SRID to geographic values.
+    if(!GeographyType.isSridSupported(srid)) {
+      throw QueryExecutionErrors.stInvalidSridValueError(srid);
+    }
+    return toPhysVal(Geography.fromWkb(wkb, srid));
+  }
+
   // ST_GeomFromWKB
-  public static GeometryVal stGeomFromWKB(byte[] wkb) {
+  public static BinaryView stGeomFromWKB(byte[] wkb) {
     return toPhysVal(Geometry.fromWkb(wkb));
   }
 
-  public static GeometryVal stGeomFromWKB(byte[] wkb, int srid) {
+  public static BinaryView stGeomFromWKB(byte[] wkb, int srid) {
     // We only allow setting the SRID to valid values.
     if(!GeometryType.isSridSupported(srid)) {
       throw QueryExecutionErrors.stInvalidSridValueError(srid);
@@ -149,37 +185,37 @@ public static GeometryVal stGeomFromWKB(byte[] wkb, int srid) {
   }
 
   // ST_SetSrid
-  public static GeographyVal stSetSrid(GeographyVal geo, int srid) {
+  public static BinaryView stGeogSetSrid(BinaryView geo, int srid) {
     // We only allow setting the SRID to geographic values.
     if(!GeographyType.isSridSupported(srid)) {
       throw QueryExecutionErrors.stInvalidSridValueError(srid);
     }
     // Create a copy of the input geography.
-    Geography copy = fromPhysVal(geo).copy();
+    Geography copy = geogFromPhysVal(geo).copy();
     // Set the SRID of the copy to the specified value.
     copy.setSrid(srid);
     return toPhysVal(copy);
   }
 
-  public static GeometryVal stSetSrid(GeometryVal geo, int srid) {
+  public static BinaryView stGeomSetSrid(BinaryView geo, int srid) {
     // We only allow setting the SRID to valid values.
     if(!GeometryType.isSridSupported(srid)) {
       throw QueryExecutionErrors.stInvalidSridValueError(srid);
     }
     // Create a copy of the input geometry.
-    Geometry copy = fromPhysVal(geo).copy();
+    Geometry copy = geomFromPhysVal(geo).copy();
     // Set the SRID of the copy to the specified value.
     copy.setSrid(srid);
     return toPhysVal(copy);
   }
 
   // ST_Srid
-  public static int stSrid(GeographyVal geog) {
-    return fromPhysVal(geog).srid();
+  public static int stGeogSrid(BinaryView geog) {
+    return geogFromPhysVal(geog).srid();
   }
 
-  public static int stSrid(GeometryVal geom) {
-    return fromPhysVal(geom).srid();
+  public static int stGeomSrid(BinaryView geom) {
+    return geomFromPhysVal(geom).srid();
   }
 
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
index 23f3acc7230fa..20586f57bcfdd 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
@@ -28,9 +28,10 @@
  * views, and functions.
  * <p>
  * Catalog implementations must implement this marker interface to be loaded by
- * {@link Catalogs#load(String, SQLConf)}. The loader will instantiate catalog classes using the
+ * {@link org.apache.spark.sql.connector.catalog.Catalogs#load(String,SQLConf)}.
+ * The loader will instantiate catalog classes using the
  * required public no-arg constructor. After creating an instance, it will be configured by calling
- * {@link #initialize(String, CaseInsensitiveStringMap)}.
+ * {@link #initialize(String,CaseInsensitiveStringMap)}.
  * <p>
  * Catalog implementations are registered to a name by adding a configuration option to Spark:
  * {@code spark.sql.catalog.catalog-name=com.example.YourCatalogClass}. All configuration properties
@@ -56,8 +57,8 @@ public interface CatalogPlugin {
   /**
    * Called to get this catalog's name.
    * <p>
-   * This method is only called after {@link #initialize(String, CaseInsensitiveStringMap)} is
-   * called to pass the catalog's name.
+   * This method is only called after
+   * {@link #initialize(String,CaseInsensitiveStringMap)} is called to pass the catalog's name.
    */
   String name();
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Changelog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Changelog.java
index 0a811aa0ae4d7..2c1dc896c1ba6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Changelog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Changelog.java
@@ -33,16 +33,92 @@
  * <ul>
  *   <li>{@code _change_type} (STRING) — the kind of change: {@code insert}, {@code delete},
  *       {@code update_preimage}, or {@code update_postimage}</li>
- *   <li>{@code _commit_version} (connector-defined type, e.g. LONG) — the version containing
- *       this change</li>
- *   <li>{@code _commit_timestamp} (TIMESTAMP) — the timestamp of the commit</li>
+ *   <li>{@code _commit_version} — the commit version containing this change. Must be
+ *       either {@code LongType} or {@code StringType}; all other types are rejected.
+ *       The column's natural ordering (numeric for {@code LongType}, lexicographic for
+ *       {@code StringType}) must match commit order, because the netChanges
+ *       post-processing path sorts rows of a given row identity by this column to
+ *       determine the first and last events.</li>
+ *   <li>{@code _commit_timestamp} (TIMESTAMP) -- the timestamp of the commit. All rows
+ *       belonging to a single {@code _commit_version} must share the same
+ *       {@code _commit_timestamp}. For streaming reads with post-processing enabled,
+ *       two additional requirements apply:
+ *       <ol>
+ *         <li>All rows of a single commit must appear in the same micro-batch (i.e.
+ *             micro-batch boundaries align with commit boundaries).</li>
+ *         <li>Each micro-batch's rows must have {@code _commit_timestamp} strictly
+ *             greater than the maximum {@code _commit_timestamp} of any prior
+ *             micro-batch.</li>
+ *       </ol>
+ *       Streaming post-processing uses {@code _commit_timestamp} as event time with a
+ *       zero-delay watermark, so once a micro-batch observes max event time T the
+ *       global watermark advances to T. Both Spark's late-event filter and its
+ *       state-eviction predicate then use {@code eventTime <= T} -- so any later row
+ *       at {@code _commit_timestamp <= T} (whether from the same commit split across
+ *       batches, a different commit emitted later, or simply an out-of-order commit)
+ *       is silently dropped as late. Requirement 1 keeps a single commit's rows
+ *       together; requirement 2 keeps distinct commits in strictly increasing
+ *       event-time order across batches. Multiple distinct commits with equal
+ *       {@code _commit_timestamp} are allowed within a single micro-batch -- only
+ *       <em>across</em> batches does timestamp progression need to be strictly
+ *       increasing. Atomic-commit CDC connectors (e.g. Delta versions, Iceberg
+ *       snapshots) that derive {@code _commit_timestamp} from wall-clock time at
+ *       commit time naturally satisfy both requirements.
+ *       {@code _commit_timestamp} must be non-{@code NULL} on every row of a streaming
+ *       read engaging post-processing; both the row-level Aggregate path and the
+ *       netChanges {@code transformWithState} path raise
+ *       {@code CHANGELOG_CONTRACT_VIOLATION.NULL_COMMIT_TIMESTAMP} on a violation</li>
  * </ul>
+ * <p>
+ * Streaming reads support carry-over removal, update detection, and net change
+ * computation. Two streaming-specific behaviors to be aware of:
+ * <ul>
+ *   <li><b>Output is buffered until the watermark advances past the commit.</b>
+ *       When a micro-batch ingests a commit, that commit's output rows are
+ *       buffered in state and not emitted in the same batch. They are emitted
+ *       by a later micro-batch -- whichever one advances the watermark past
+ *       the commit's {@code _commit_timestamp}. The last commit's output is
+ *       emitted when the source terminates.</li>
+ *   <li><b>netChanges only merges changes that are buffered together.</b>
+ *       When each row identity appears in at most one commit within any
+ *       buffered window, the streaming output is the same as
+ *       {@code computeUpdates}. Cross-commit merging only happens when
+ *       several commits touch the same row before the earliest one's output
+ *       has been released. For full-range collapse, use a batch read.</li>
+ * </ul>
+ * <p>
+ * <b>Pushdown contract.</b> When any post-processing pass applies (carry-over
+ * removal, update detection, or netChanges), Spark only pushes predicates
+ * that reference {@code _commit_version}, {@code _commit_timestamp}, or
+ * columns named by {@link #rowId()} to the connector's
+ * {@link org.apache.spark.sql.connector.read.SupportsPushDownFilters} /
+ * {@link org.apache.spark.sql.connector.read.SupportsPushDownV2Filters}.
+ * Predicates on {@code _change_type}, the {@link #rowVersion()} column, or
+ * non-rowId data columns are kept above the scan: pushing them would drop
+ * one half of a delete/insert pair within a row-identity group and silently
+ * break post-processing. Catalyst's pushdown rules enforce this via the
+ * rewrite operators, so connectors do not need to code the restriction
+ * themselves -- but must not bypass it via connector-specific options. When
+ * no post-processing pass applies, Spark does not impose any CDC-specific
+ * predicate-pushdown restriction.
+ * {@link org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns}
+ * (column pruning) is unrestricted in either case: Spark's pruning already
+ * respects what the rewrite operators reference.
  *
  * @since 4.2.0
  */
 @Evolving
 public interface Changelog {
 
+  /** Constant for the {@code _change_type} value of a row inserted into the table. */
+  String CHANGE_TYPE_INSERT = "insert";
+  /** Constant for the {@code _change_type} value of a row deleted from the table. */
+  String CHANGE_TYPE_DELETE = "delete";
+  /** Constant for the {@code _change_type} value of an update's pre-image row. */
+  String CHANGE_TYPE_UPDATE_PREIMAGE = "update_preimage";
+  /** Constant for the {@code _change_type} value of an update's post-image row. */
+  String CHANGE_TYPE_UPDATE_POSTIMAGE = "update_postimage";
+
   /** A name to identify this changelog. */
   String name();
 
@@ -72,6 +148,9 @@ public interface Changelog {
    * Spark will collapse multiple changes per row identity into the net effect.
    * If {@code false}, the connector guarantees at most one change per row identity across
    * the entire changelog range, and Spark will skip net change computation.
+   * <p>
+   * Note this flag is range-scoped (across all commits in the request), not
+   * micro-batch-scoped.
    */
   boolean containsIntermediateChanges();
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ChangelogInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ChangelogContext.java
similarity index 82%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ChangelogInfo.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ChangelogContext.java
index 04a6d055f56b7..e5e4cd380fe0d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ChangelogInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ChangelogContext.java
@@ -20,34 +20,46 @@
 import java.util.Objects;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * Encapsulates the parameters of a Change Data Capture (CDC) query, passed from the
  * parser / DataFrame API to the catalog's
- * {@link TableCatalog#loadChangelog(Identifier, ChangelogInfo)} method.
+ * {@link TableCatalog#loadChangelog(Identifier, ChangelogContext, CaseInsensitiveStringMap)}
+ * method.
  *
  * @since 4.2.0
  */
 @Evolving
-public class ChangelogInfo {
+public class ChangelogContext {
 
   /**
    * Deduplication modes controlling how Spark post-processes raw change data.
    */
   public enum DeduplicationMode {
     /** Raw change rows as-is from the connector — no post-processing. */
-    NONE,
+    NONE("none"),
     /** Remove identical insert/delete pairs from copy-on-write file rewrites (default). */
-    DROP_CARRYOVERS,
+    DROP_CARRYOVERS("dropCarryovers"),
     /** Collapse to one net change per row identity across the entire changelog range. */
-    NET_CHANGES
+    NET_CHANGES("netChanges");
+
+    private final String value;
+
+    DeduplicationMode(String value) {
+      this.value = value;
+    }
+
+    public String value() {
+      return value;
+    }
   }
 
   private final ChangelogRange range;
   private final DeduplicationMode deduplicationMode;
   private final boolean computeUpdates;
 
-  public ChangelogInfo(
+  public ChangelogContext(
       ChangelogRange range,
       DeduplicationMode deduplicationMode,
       boolean computeUpdates) {
@@ -68,7 +80,7 @@ public ChangelogInfo(
   @Override
   public boolean equals(Object o) {
     if (this == o) return true;
-    if (!(o instanceof ChangelogInfo that)) return false;
+    if (!(o instanceof ChangelogContext that)) return false;
     return computeUpdates == that.computeUpdates
         && Objects.equals(range, that.range)
         && deduplicationMode == that.deduplicationMode;
@@ -81,7 +93,7 @@ public int hashCode() {
 
   @Override
   public String toString() {
-    return "ChangelogInfo{range=" + range +
+    return "ChangelogContext{range=" + range +
         ", deduplicationMode=" + deduplicationMode +
         ", computeUpdates=" + computeUpdates + "}";
   }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
index 8b32940d7a657..537c2edd11285 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
@@ -53,7 +53,16 @@ static Column create(
       boolean nullable,
       String comment,
       String metadataInJSON) {
-    return new ColumnImpl(name, dataType, nullable, comment, null, null, null, metadataInJSON);
+    return new ColumnImpl(
+        name,
+        dataType,
+        nullable,
+        comment,
+        /* defaultValue = */ null,
+        /* generationExpression = */ null,
+        /* identityColumnSpec = */ null,
+        metadataInJSON,
+        /* id = */ null);
   }
 
   static Column create(
@@ -63,8 +72,16 @@ static Column create(
       String comment,
       ColumnDefaultValue defaultValue,
       String metadataInJSON) {
-    return new ColumnImpl(name, dataType, nullable, comment, defaultValue,
-            null, null, metadataInJSON);
+    return new ColumnImpl(
+        name,
+        dataType,
+        nullable,
+        comment,
+        defaultValue,
+        /* generationExpression = */ null,
+        /* identityColumnSpec = */ null,
+        metadataInJSON,
+        /* id = */ null);
   }
 
   static Column create(
@@ -74,8 +91,16 @@ static Column create(
       String comment,
       String generationExpression,
       String metadataInJSON) {
-    return new ColumnImpl(name, dataType, nullable, comment, null,
-            generationExpression, null, metadataInJSON);
+    return new ColumnImpl(
+        name,
+        dataType,
+        nullable,
+        comment,
+        /* defaultValue = */ null,
+        generationExpression,
+        /* identityColumnSpec = */ null,
+        metadataInJSON,
+        /* id = */ null);
   }
 
   static Column create(
@@ -85,8 +110,16 @@ static Column create(
           String comment,
           IdentityColumnSpec identityColumnSpec,
           String metadataInJSON) {
-    return new ColumnImpl(name, dataType, nullable, comment, null,
-            null, identityColumnSpec, metadataInJSON);
+    return new ColumnImpl(
+        name,
+        dataType,
+        nullable,
+        comment,
+        /* defaultValue = */ null,
+        /* generationExpression = */ null,
+        identityColumnSpec,
+        metadataInJSON,
+        /* id = */ null);
   }
 
   /**
@@ -136,4 +169,36 @@ static Column create(
    */
   @Nullable
   String metadataInJSON();
+
+  /**
+   * Returns the ID of this top-level column, or null. The ID is an opt-in identifier that the
+   * connector uses to track column identity beyond column name and type.
+   * <p>
+   * When a non-null ID is returned, the connector commits to the following contract:
+   * <ul>
+   *   <li>The ID is stable across renames (logical name changes preserve the ID).</li>
+   *   <li>The ID changes when a top-level column is dropped and re-added, even with the same
+   *       name and type.</li>
+   *   <li>IDs are not reused within a table's history.</li>
+   * </ul>
+   * <p>
+   * When null is returned, Spark skips identity validation for that column. Connectors should
+   * return null when:
+   * <ul>
+   *   <li>The catalog has no notion of column identity beyond name and type, OR</li>
+   *   <li>The connector chooses to treat same-name drop+re-add as the same column
+   *       (lenient semantics).</li>
+   * </ul>
+   * Returning null is per-column: a connector may return IDs for some columns and null for
+   * others.
+   * <p>
+   * This API covers top-level columns only. Nested struct fields, array elements, and map
+   * keys/values do not have separate IDs. Connectors that track nested field IDs can encode
+   * them into the returned top-level Column ID string to detect nested changes, since Spark
+   * only compares string equality.
+   */
+  @Nullable
+  default String id() {
+    return null;
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
index 786821514822e..421785568bb71 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -111,6 +111,13 @@ public Table createTable(
     return asTableCatalog().createTable(ident, columns, partitions, properties);
   }
 
+  @Override
+  public Table createTableLike(
+      Identifier ident, TableInfo tableInfo, Table sourceTable)
+      throws TableAlreadyExistsException, NoSuchNamespaceException {
+    return asTableCatalog().createTableLike(ident, tableInfo, sourceTable);
+  }
+
   @Override
   public Table alterTable(
       Identifier ident,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Dependency.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Dependency.java
new file mode 100644
index 0000000000000..4de02606b981f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Dependency.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Represents a dependency of a SQL object such as a view or metric view.
+ * <p>
+ * A dependency is one of: {@link TableDependency} or {@link FunctionDependency}. The
+ * {@code sealed} declaration enforces this structurally.
+ * <p>
+ * Note: today the only producer in Spark itself is metric-view dependency extraction, which
+ * emits {@link TableDependency} only. {@link FunctionDependency} and the
+ * {@link #function(String[])} factory are exposed as groundwork for future producers
+ * (e.g. SQL UDF dependency tracking); consumers iterating a {@link DependencyList} received
+ * from Spark today should expect to see only {@link TableDependency} instances.
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public sealed interface Dependency permits TableDependency, FunctionDependency {
+
+  /**
+   * Construct a {@link TableDependency} from the structural multi-part name of the dependent
+   * table. {@code nameParts} should contain at least one element; for catalog-managed tables
+   * the first element is typically the catalog name and subsequent elements are namespace
+   * components followed by the table name.
+   */
+  static TableDependency table(String[] nameParts) {
+    return new TableDependency(nameParts);
+  }
+
+  /**
+   * Construct a {@link FunctionDependency} from the structural multi-part name of the
+   * dependent function. {@code nameParts} should contain at least one element; for
+   * catalog-managed functions the first element is typically the catalog name and subsequent
+   * elements are namespace components followed by the function name.
+   */
+  static FunctionDependency function(String[] nameParts) {
+    return new FunctionDependency(nameParts);
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DependencyList.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DependencyList.java
new file mode 100644
index 0000000000000..21c1e662fda62
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DependencyList.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A list of dependencies for a SQL object such as a view or metric view.
+ * <p>
+ * <ul>
+ *   <li>When {@code null}, the dependency information is not provided.</li>
+ *   <li>When the array is empty, dependencies are provided but the object has none.</li>
+ *   <li>When the array is non-empty, each entry describes one dependency.</li>
+ * </ul>
+ * <p>
+ * Records' auto-generated {@code equals}/{@code hashCode} on array fields fall through to
+ * {@link Object#equals} (reference equality), so this record overrides them to use
+ * {@link Arrays#equals(Object[], Object[])} / {@link Arrays#hashCode(Object[])} on
+ * {@code dependencies}; per-element equality delegates to the element's overridden
+ * {@code equals} ({@link TableDependency} / {@link FunctionDependency} both implement value
+ * semantics on their {@code nameParts} array). The defensive-copy accessor override clones
+ * on read so callers cannot mutate the record's internal array.
+ *
+ * @param dependencies array of dependencies; must contain no null elements (defensive
+ *                     copy made; not validated element-wise -- callers passing nulls will
+ *                     surface NPEs in downstream consumers)
+ * @since 4.2.0
+ */
+@Evolving
+public record DependencyList(Dependency[] dependencies) {
+
+  public DependencyList {
+    Objects.requireNonNull(dependencies, "dependencies must not be null");
+    dependencies = dependencies.clone();
+  }
+
+  /** Returns a defensive copy of the underlying dependencies array. */
+  @Override
+  public Dependency[] dependencies() { return dependencies.clone(); }
+
+  @Override
+  public boolean equals(Object o) {
+    return o instanceof DependencyList that && Arrays.equals(dependencies, that.dependencies);
+  }
+
+  @Override
+  public int hashCode() { return Arrays.hashCode(dependencies); }
+
+  @Override
+  public String toString() {
+    return "DependencyList[dependencies=" + Arrays.toString(dependencies) + "]";
+  }
+
+  public static DependencyList of(Dependency[] dependencies) {
+    return new DependencyList(dependencies);
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/FunctionDependency.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/FunctionDependency.java
new file mode 100644
index 0000000000000..c19d118043afa
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/FunctionDependency.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A function dependency of a SQL object.
+ * <p>
+ * The dependent function is identified by its structural multi-part name. See
+ * {@link TableDependency} for the parts-form contract.
+ * <p>
+ * Records' auto-generated {@code equals}/{@code hashCode} on array fields fall through to
+ * {@link Object#equals} (reference equality), so this record overrides them to use
+ * {@link Arrays#equals(Object[], Object[])} / {@link Arrays#hashCode(Object[])} on
+ * {@code nameParts} and give value-based semantics. The defensive-copy accessor override
+ * also clones on read so callers cannot mutate the record's internal array.
+ *
+ * @param nameParts structural multi-part identifier; must be non-empty and contain no
+ *                  null elements (defensive copy made; not validated element-wise --
+ *                  callers passing nulls will surface NPEs in downstream consumers)
+ * @since 4.2.0
+ */
+@Evolving
+public record FunctionDependency(String[] nameParts) implements Dependency {
+  public FunctionDependency {
+    Objects.requireNonNull(nameParts, "nameParts must not be null");
+    if (nameParts.length == 0) {
+      throw new IllegalArgumentException("nameParts must not be empty");
+    }
+    nameParts = nameParts.clone();
+  }
+
+  /** Returns a defensive copy of the underlying parts array. */
+  @Override
+  public String[] nameParts() { return nameParts.clone(); }
+
+  @Override
+  public boolean equals(Object o) {
+    return o instanceof FunctionDependency that && Arrays.equals(nameParts, that.nameParts);
+  }
+
+  @Override
+  public int hashCode() { return Arrays.hashCode(nameParts); }
+
+  @Override
+  public String toString() {
+    return "FunctionDependency[nameParts=" + Arrays.toString(nameParts) + "]";
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataTable.java
new file mode 100644
index 0000000000000..1d1acfde80f9d
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataTable.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.catalog.constraints.Constraint;
+import org.apache.spark.sql.connector.expressions.Transform;
+
+/**
+ * A concrete {@code Table} implementation that contains only table metadata, deferring
+ * read/write to Spark. It represents a general Spark data source table or a Spark view;
+ * Spark resolves the table provider into a data source (for tables) or expands the view text
+ * (for views) at read time.
+ * <p>
+ * Catalogs build the metadata via {@link TableInfo.Builder} (for data-source tables) or
+ * {@link ViewInfo.Builder} (for views). A {@code MetadataTable} wrapping a
+ * {@link TableInfo} can be returned from {@link TableCatalog#loadTable(Identifier)} for a
+ * data-source table; a {@code MetadataTable} wrapping a {@link ViewInfo} can be returned
+ * from {@link TableViewCatalog#loadTableOrView(Identifier)} as the single-RPC perf opt-in
+ * for a view.
+ * Downstream consumers distinguish the two by checking
+ * {@code getTableInfo() instanceof ViewInfo}.
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public class MetadataTable implements Table {
+  private final TableInfo info;
+  private final String name;
+
+  /**
+   * @param info metadata for the table or view. Pass a {@link ViewInfo} for a view.
+   * @param name human-readable name for this table, returned by {@link #name()} and surfaced
+   *             in places that read it (e.g. {@code BatchScan} plan-tree labels and
+   *             partition-management error messages). {@code DESCRIBE TABLE EXTENDED} does
+   *             not read this field; it emits the resolved identifier as structured
+   *             {@code Catalog} / {@code Namespace} / {@code Table} rows. Catalogs returning
+   *             a {@code MetadataTable} from {@link TableCatalog#loadTable} or
+   *             {@link TableViewCatalog#loadTableOrView} should typically pass
+   *             {@code ident.toString()}, matching the quoted multi-part form used elsewhere
+   *             for v2 identifiers.
+   */
+  public MetadataTable(TableInfo info, String name) {
+    this.info = Objects.requireNonNull(info, "info should not be null");
+    this.name = Objects.requireNonNull(name, "name should not be null");
+  }
+
+  public TableInfo getTableInfo() {
+    return info;
+  }
+
+  @Override
+  public Column[] columns() {
+    return info.columns();
+  }
+
+  @Override
+  public Map<String, String> properties() {
+    return Collections.unmodifiableMap(info.properties());
+  }
+
+  @Override
+  public Transform[] partitioning() {
+    return info.partitions();
+  }
+
+  @Override
+  public Constraint[] constraints() {
+    return info.constraints();
+  }
+
+  @Override
+  public String name() {
+    return name;
+  }
+
+  @Override
+  public Set<TableCapability> capabilities() {
+    return Set.of();
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index d5a36cd8bfb86..f64c34ee0e071 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -25,19 +25,25 @@
 import org.apache.spark.sql.errors.QueryCompilationErrors;
 import org.apache.spark.sql.errors.QueryExecutionErrors;
 import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 import java.util.ArrayList;
 import java.util.Map;
 import java.util.Set;
 
 /**
- * Catalog methods for working with Tables.
+ * Catalog API for connectors that expose tables.
+ * <p>
+ * Connectors that expose <i>only</i> tables implement this interface. Connectors that expose
+ * both tables and views must implement {@link TableViewCatalog} (which extends both this
+ * interface and {@link ViewCatalog} and adds the cross-cutting contract for the combined
+ * case); the methods on this interface remain table-only -- they do not interact with views.
  * <p>
  * TableCatalog implementations may be case-sensitive or case-insensitive. Spark will pass
  * {@link Identifier table identifiers} without modification. Field names passed to
- * {@link #alterTable(Identifier, TableChange...)} will be normalized to match the case used in the
- * table schema when updating, renaming, or dropping existing columns when catalyst analysis is
- * case-insensitive.
+ * {@link #alterTable(Identifier, TableChange...)} will be normalized to match the case used in
+ * the table schema when updating, renaming, or dropping existing columns when catalyst
+ * analysis is case-insensitive.
  *
  * @since 3.0.0
  */
@@ -99,8 +105,6 @@ public interface TableCatalog extends CatalogPlugin {
 
   /**
    * List the tables in a namespace from the catalog.
-   * <p>
-   * If the catalog supports views, this must return identifiers for only tables and not views.
    *
    * @param namespace a multi-part namespace
    * @return an array of Identifiers for tables
@@ -111,11 +115,14 @@ public interface TableCatalog extends CatalogPlugin {
   /**
    * List the table summaries in a namespace from the catalog.
    * <p>
-   * This method should return all tables entities from a catalog regardless of type (i.e. views
-   * should be listed as well).
+   * Returns one summary per entry returned by {@link #listTables}. Each {@link TableSummary}
+   * carries the entry's {@code tableType}.
+   * <p>
+   * The default implementation enumerates via {@link #listTables} + {@link #loadTable}.
+   * Catalogs that can fetch summaries in a single round-trip should override.
    *
    * @param namespace a multi-part namespace
-   * @return an array of Identifiers for tables
+   * @return an array of summaries for tables in the namespace
    * @throws NoSuchNamespaceException If the namespace does not exist (optional).
    * @throws NoSuchTableException If certain table listed by listTables API does not exist.
    */
@@ -139,27 +146,21 @@ default TableSummary[] listTableSummaries(String[] namespace)
 
   /**
    * Load table metadata by {@link Identifier identifier} from the catalog.
-   * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must throw {@link NoSuchTableException}.
    *
    * @param ident a table identifier
    * @return the table's metadata
-   * @throws NoSuchTableException If the table doesn't exist or is a view
+   * @throws NoSuchTableException If the table doesn't exist
    */
   Table loadTable(Identifier ident) throws NoSuchTableException;
 
   /**
    * Load table metadata by {@link Identifier identifier} from the catalog. Spark will write data
    * into this table later.
-   * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must throw {@link NoSuchTableException}.
    *
    * @param ident a table identifier
    * @param writePrivileges
    * @return the table's metadata
-   * @throws NoSuchTableException If the table doesn't exist or is a view
+   * @throws NoSuchTableException If the table doesn't exist
    *
    * @since 3.5.3
    */
@@ -171,14 +172,11 @@ default Table loadTable(
 
   /**
    * Load table metadata of a specific version by {@link Identifier identifier} from the catalog.
-   * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must throw {@link NoSuchTableException}.
    *
    * @param ident a table identifier
    * @param version version of the table
    * @return the table's metadata
-   * @throws NoSuchTableException If the table doesn't exist or is a view
+   * @throws NoSuchTableException If the table doesn't exist
    */
   default Table loadTable(Identifier ident, String version) throws NoSuchTableException {
     throw QueryCompilationErrors.noSuchTableError(name(), ident);
@@ -186,14 +184,11 @@ default Table loadTable(Identifier ident, String version) throws NoSuchTableExce
 
   /**
    * Load table metadata at a specific time by {@link Identifier identifier} from the catalog.
-   * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must throw {@link NoSuchTableException}.
    *
    * @param ident a table identifier
    * @param timestamp timestamp of the table, which is microseconds since 1970-01-01 00:00:00 UTC
    * @return the table's metadata
-   * @throws NoSuchTableException If the table doesn't exist or is a view
+   * @throws NoSuchTableException If the table doesn't exist
    */
   default Table loadTable(Identifier ident, long timestamp) throws NoSuchTableException {
     throw QueryCompilationErrors.noSuchTableError(name(), ident);
@@ -201,22 +196,25 @@ default Table loadTable(Identifier ident, long timestamp) throws NoSuchTableExce
 
   /**
    * Load a {@link Changelog} for the given table, representing the row-level changes within the
-   * range specified by {@code changelogInfo}.
+   * range specified by {@code context}.
    * <p>
    * The default implementation throws an analysis exception indicating that the catalog does
    * not support CDC. Catalogs that support CDC must override this method.
    *
    * @param ident a table identifier
-   * @param changelogInfo the CDC query parameters (range, deduplication mode, etc.)
+   * @param context the CDC query context (range, deduplication mode, etc.)
+   * @param options all options passed to the changelog query, including the CDC-recognized
+   *                keys (range, deduplication mode, etc.) that are also parsed into {@code context}
    * @return a Changelog instance for the requested table and range
    * @throws NoSuchTableException If the table doesn't exist
    *
    * @since 4.2.0
    */
-  default Changelog loadChangelog(Identifier ident, ChangelogInfo changelogInfo)
-      throws NoSuchTableException {
-    throw new UnsupportedOperationException(
-        name() + " does not support Change Data Capture (CDC)");
+  default Changelog loadChangelog(
+      Identifier ident,
+      ChangelogContext context,
+      CaseInsensitiveStringMap options) throws NoSuchTableException {
+    throw new UnsupportedOperationException(name() + " does not support Change Data Capture (CDC)");
   }
 
   /**
@@ -232,12 +230,9 @@ default void invalidateTable(Identifier ident) {
 
   /**
    * Test whether a table exists using an {@link Identifier identifier} from the catalog.
-   * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must return false.
    *
    * @param ident a table identifier
-   * @return true if the table exists, false otherwise
+   * @return true if a table exists at {@code ident}, false otherwise
    */
   default boolean tableExists(Identifier ident) {
     try {
@@ -281,11 +276,11 @@ default Table createTable(
    * Create a table in the catalog.
    *
    * @param ident a table identifier
-   * @param tableInfo information about the table.
+   * @param tableInfo information about the table
    * @return metadata for the new table. This can be null if getting the metadata for the new table
    *         is expensive. Spark will call {@link #loadTable(Identifier)} if needed (e.g. CTAS).
    *
-   * @throws TableAlreadyExistsException If a table or view already exists for the identifier
+   * @throws TableAlreadyExistsException If a table already exists for the identifier
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    * @since 4.1.0
@@ -317,7 +312,7 @@ default Table createTable(Identifier ident, TableInfo tableInfo)
    *                    or other custom state from this object to clone additional metadata
    * @return metadata for the new table
    *
-   * @throws TableAlreadyExistsException If a table or view already exists for the identifier
+   * @throws TableAlreadyExistsException If a table already exists for the identifier
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    * @throws UnsupportedOperationException If the catalog does not support CREATE TABLE LIKE
    * @since 4.2.0
@@ -343,16 +338,13 @@ default boolean useNullableQuerySchema() {
    * changes should be applied to the table.
    * <p>
    * The requested changes must be applied in the order given.
-   * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must throw {@link NoSuchTableException}.
    *
    * @param ident a table identifier
    * @param changes changes to apply to the table
    * @return updated metadata for the table. This can be null if getting the metadata for the
    *         updated table is expensive. Spark always discard the returned table here.
    *
-   * @throws NoSuchTableException If the table doesn't exist or is a view
+   * @throws NoSuchTableException If the table doesn't exist
    * @throws IllegalArgumentException If any change is rejected by the implementation.
    */
   Table alterTable(
@@ -361,9 +353,6 @@ Table alterTable(
 
   /**
    * Drop a table in the catalog.
-   * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must not drop the view and must return false.
    *
    * @param ident a table identifier
    * @return true if a table was deleted, false if no table exists for the identifier
@@ -374,9 +363,6 @@ Table alterTable(
    * Drop a table in the catalog and completely remove its data by skipping a trash even if it is
    * supported.
    * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table, this
-   * must not drop the view and must return false.
-   * <p>
    * If the catalog supports to purge a table, this method should be overridden.
    * The default implementation throws {@link UnsupportedOperationException}.
    *
@@ -393,17 +379,13 @@ default boolean purgeTable(Identifier ident) throws UnsupportedOperationExceptio
   /**
    * Renames a table in the catalog.
    * <p>
-   * If the catalog supports views and contains a view for the old identifier and not a table, this
-   * throws {@link NoSuchTableException}. Additionally, if the new identifier is a table or a view,
-   * this throws {@link TableAlreadyExistsException}.
-   * <p>
    * If the catalog does not support table renames between namespaces, it throws
    * {@link UnsupportedOperationException}.
    *
    * @param oldIdent the table identifier of the existing table to rename
    * @param newIdent the new table identifier of the table
-   * @throws NoSuchTableException If the table to rename doesn't exist or is a view
-   * @throws TableAlreadyExistsException If the new table name already exists or is a view
+   * @throws NoSuchTableException If the table to rename doesn't exist
+   * @throws TableAlreadyExistsException If the new table name already exists
    * @throws UnsupportedOperationException If the namespaces of old and new identifiers do not
    *                                       match (optional)
    */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableDependency.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableDependency.java
new file mode 100644
index 0000000000000..0aa9a47311607
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableDependency.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A table dependency of a SQL object.
+ * <p>
+ * The dependent table is identified by its structural multi-part name. {@code nameParts}
+ * arity matches the catalog's namespace depth plus one for the table name -- for a catalog
+ * with single-level namespaces the parts are {@code [catalog, schema, table]}; for a catalog
+ * with multi-level namespaces (e.g. Iceberg with {@code db1.db2}) the parts are
+ * {@code [catalog, db1, db2, ..., table]}; for v1 sources resolved through the session
+ * catalog, producers should normalize to {@code [spark_catalog, db, table]} so consumers see
+ * a stable arity per source kind. The structural form preserves arity and is unambiguous
+ * against quoted identifiers containing a literal {@code .}; consumers that need a flat
+ * string should join the parts themselves with a quoting scheme appropriate to their wire
+ * format.
+ * <p>
+ * Records' auto-generated {@code equals}/{@code hashCode} on array fields fall through to
+ * {@link Object#equals} (reference equality), so this record overrides them to use
+ * {@link Arrays#equals(Object[], Object[])} / {@link Arrays#hashCode(Object[])} on
+ * {@code nameParts} and give value-based semantics. The defensive-copy accessor override
+ * also clones on read so callers cannot mutate the record's internal array.
+ *
+ * @param nameParts structural multi-part identifier; must be non-empty and contain no
+ *                  null elements (defensive copy made; not validated element-wise --
+ *                  callers passing nulls will surface NPEs in downstream consumers)
+ * @since 4.2.0
+ */
+@Evolving
+public record TableDependency(String[] nameParts) implements Dependency {
+  public TableDependency {
+    Objects.requireNonNull(nameParts, "nameParts must not be null");
+    if (nameParts.length == 0) {
+      throw new IllegalArgumentException("nameParts must not be empty");
+    }
+    nameParts = nameParts.clone();
+  }
+
+  /** Returns a defensive copy of the underlying parts array. */
+  @Override
+  public String[] nameParts() { return nameParts.clone(); }
+
+  @Override
+  public boolean equals(Object o) {
+    return o instanceof TableDependency that && Arrays.equals(nameParts, that.nameParts);
+  }
+
+  @Override
+  public int hashCode() { return Arrays.hashCode(nameParts); }
+
+  @Override
+  public String toString() {
+    return "TableDependency[nameParts=" + Arrays.toString(nameParts) + "]";
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java
index 9870a3b0fa45d..89709c9f1c2f0 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java
@@ -33,9 +33,8 @@ public class TableInfo {
 
   /**
    * Constructor for TableInfo used by the builder.
-   * @param builder Builder.
    */
-  private TableInfo(Builder builder) {
+  protected TableInfo(BaseBuilder<?> builder) {
     this.columns = builder.columns;
     this.properties = builder.properties;
     this.partitions = builder.partitions;
@@ -60,35 +59,96 @@ public Transform[] partitions() {
 
   public Constraint[] constraints() { return constraints; }
 
-  public static class Builder {
-    private Column[] columns = new Column[0];
-    private Map<String, String> properties = new HashMap<>();
-    private Transform[] partitions = new Transform[0];
-    private Constraint[] constraints = new Constraint[0];
+  public static class Builder extends BaseBuilder<Builder> {
+    @Override
+    protected Builder self() { return this; }
 
-    public Builder withColumns(Column[] columns) {
+    @Override
+    public TableInfo build() {
+      Objects.requireNonNull(columns, "columns should not be null");
+      return new TableInfo(this);
+    }
+  }
+
+  /**
+   * Shared builder state for {@link TableInfo} and its subclasses. Setters return {@code B} so
+   * subclass builders (e.g. {@link ViewInfo.Builder}) chain through their own type without
+   * a covariant override on each inherited setter.
+   */
+  protected abstract static class BaseBuilder<B extends BaseBuilder<B>> {
+    protected Column[] columns = new Column[0];
+    protected Map<String, String> properties = new HashMap<>();
+    protected Transform[] partitions = new Transform[0];
+    protected Constraint[] constraints = new Constraint[0];
+
+    protected abstract B self();
+
+    public B withColumns(Column[] columns) {
       this.columns = columns;
-      return this;
+      return self();
     }
 
-    public Builder withProperties(Map<String, String> properties) {
-      this.properties = properties;
-      return this;
+    public B withSchema(StructType schema) {
+      this.columns = CatalogV2Util.structTypeToV2Columns(schema);
+      return self();
     }
 
-    public Builder withPartitions(Transform[] partitions) {
+    /**
+     * Replaces the current properties map with a defensive copy of the given map. Any reserved
+     * keys set earlier via convenience setters (e.g. {@link #withProvider}) are discarded --
+     * call those setters <i>after</i> this method, not before.
+     */
+    public B withProperties(Map<String, String> properties) {
+      this.properties = new HashMap<>(properties);
+      return self();
+    }
+
+    public B withPartitions(Transform[] partitions) {
       this.partitions = partitions;
-      return this;
+      return self();
     }
 
-    public Builder withConstraints(Constraint[] constraints) {
+    public B withConstraints(Constraint[] constraints) {
       this.constraints = constraints;
-      return this;
+      return self();
     }
 
-    public TableInfo build() {
-      Objects.requireNonNull(columns, "columns should not be null");
-      return new TableInfo(this);
+    // Convenience setters below write reserved keys into the current `properties` map. Pair
+    // each with a preceding `withProperties(...)` call if you want to start from a user map;
+    // calling `withProperties` after a convenience setter discards the value the convenience
+    // setter wrote.
+
+    /** Writes {@link TableCatalog#PROP_PROVIDER} into the current properties map. */
+    public B withProvider(String provider) {
+      properties.put(TableCatalog.PROP_PROVIDER, provider);
+      return self();
+    }
+
+    public B withLocation(String location) {
+      properties.put(TableCatalog.PROP_LOCATION, location);
+      return self();
     }
+
+    public B withComment(String comment) {
+      properties.put(TableCatalog.PROP_COMMENT, comment);
+      return self();
+    }
+
+    public B withCollation(String collation) {
+      properties.put(TableCatalog.PROP_COLLATION, collation);
+      return self();
+    }
+
+    public B withOwner(String owner) {
+      properties.put(TableCatalog.PROP_OWNER, owner);
+      return self();
+    }
+
+    public B withTableType(String tableType) {
+      properties.put(TableCatalog.PROP_TABLE_TYPE, tableType);
+      return self();
+    }
+
+    public abstract TableInfo build();
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableSummary.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableSummary.java
index 8f46a372342a8..17a4f23bdd1f2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableSummary.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableSummary.java
@@ -27,6 +27,7 @@ public interface TableSummary {
     String EXTERNAL_TABLE_TYPE = "EXTERNAL";
     String VIEW_TABLE_TYPE = "VIEW";
     String FOREIGN_TABLE_TYPE = "FOREIGN";
+    String METRIC_VIEW_TABLE_TYPE = "METRIC_VIEW";
 
     Identifier identifier();
     String tableType();
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableViewCatalog.java
new file mode 100644
index 0000000000000..45ec41d680d8b
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableViewCatalog.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.ArrayList;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchViewException;
+
+/**
+ * Catalog API for connectors that expose both tables and views in a single shared identifier
+ * namespace.
+ * <p>
+ * Connectors that expose <i>both</i> tables and views must implement {@code TableViewCatalog};
+ * implementing {@link TableCatalog} and {@link ViewCatalog} directly without
+ * {@code TableViewCatalog} is rejected at catalog initialization. Connectors that expose only
+ * tables implement just {@link TableCatalog}; connectors that expose only views implement just
+ * {@link ViewCatalog}; this interface is not relevant to them.
+ *
+ * <h2>Two principles</h2>
+ *
+ * A {@code TableViewCatalog} follows two rules that, taken together, define every cross-cutting
+ * subtlety:
+ * <ol>
+ *   <li><b>Orthogonal interfaces.</b> Every {@link TableCatalog} method behaves as if views did
+ *       not exist, and every {@link ViewCatalog} method behaves as if tables did not exist.
+ *       From the perspective of a {@code TableCatalog} caller, a view at an identifier is
+ *       indistinguishable from "nothing there"; symmetrically for {@code ViewCatalog} on
+ *       tables. The implementation, of course, knows about both kinds -- it just filters them
+ *       apart at each method boundary.</li>
+ *   <li><b>Single identifier namespace.</b> Tables and views share one keyspace within a
+ *       namespace; the same {@link Identifier} cannot resolve to both at the same time. The
+ *       implementation typically enforces this with a single backing keyspace plus a kind
+ *       discriminator.</li>
+ * </ol>
+ *
+ * <h2>Per-method cross-type behavior</h2>
+ *
+ * <b>Active rejection</b> (write-side methods that throw on cross-type collision):
+ * <table>
+ *   <caption>Cross-type rejection</caption>
+ *   <tr><th>Method</th><th>Rejects when</th><th>Throws</th></tr>
+ *   <tr><td>{@link TableCatalog#createTable}</td><td>a view sits at {@code ident}</td>
+ *       <td>{@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}</td></tr>
+ *   <tr><td>{@link TableCatalog#renameTable}</td>
+ *       <td>a view sits at {@code newIdent}</td>
+ *       <td>{@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}</td></tr>
+ *   <tr><td>{@link ViewCatalog#createView}</td><td>a table sits at {@code ident}</td>
+ *       <td>{@link org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException}</td></tr>
+ *   <tr><td>{@link ViewCatalog#createOrReplaceView}</td><td>a table sits at {@code ident}</td>
+ *       <td>{@link org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException}</td></tr>
+ *   <tr><td>{@link ViewCatalog#replaceView}</td><td>a table sits at {@code ident}</td>
+ *       <td>{@link org.apache.spark.sql.catalyst.analysis.NoSuchViewException}</td></tr>
+ *   <tr><td>{@link ViewCatalog#renameView}</td>
+ *       <td>a table sits at {@code newIdent}</td>
+ *       <td>{@link org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException}</td></tr>
+ * </table>
+ *
+ * <b>Passive filtering</b> (read / non-collision mutation methods that behave as if the wrong
+ * kind doesn't exist):
+ * <table>
+ *   <caption>Cross-type filtering</caption>
+ *   <tr><th>Method</th><th>On wrong-kind ident</th></tr>
+ *   <tr><td>{@link TableCatalog#loadTable(Identifier)}</td>
+ *       <td>throws {@code NoSuchTableException} for a view</td></tr>
+ *   <tr><td>{@link TableCatalog#loadTable(Identifier, String)} /
+ *       {@link TableCatalog#loadTable(Identifier, long)}</td>
+ *       <td>throws {@code NoSuchTableException} for a view (no perf opt-in -- time-travel does
+ *       not apply to views)</td></tr>
+ *   <tr><td>{@link TableCatalog#tableExists}</td><td>returns {@code false} for a view</td></tr>
+ *   <tr><td>{@link TableCatalog#dropTable} / {@link TableCatalog#purgeTable}</td>
+ *       <td>returns {@code false} for a view; does not drop it</td></tr>
+ *   <tr><td>{@link TableCatalog#renameTable}</td>
+ *       <td>throws {@code NoSuchTableException} when the source is a view</td></tr>
+ *   <tr><td>{@link TableCatalog#listTables}</td><td>tables only</td></tr>
+ *   <tr><td>{@link ViewCatalog#loadView}</td>
+ *       <td>throws {@code NoSuchViewException} for a table</td></tr>
+ *   <tr><td>{@link ViewCatalog#viewExists}</td><td>returns {@code false} for a table</td></tr>
+ *   <tr><td>{@link ViewCatalog#dropView}</td>
+ *       <td>returns {@code false} for a table; does not drop it</td></tr>
+ *   <tr><td>{@link ViewCatalog#renameView}</td>
+ *       <td>throws {@code NoSuchViewException} when the source is a table</td></tr>
+ *   <tr><td>{@link ViewCatalog#listViews}</td><td>views only</td></tr>
+ * </table>
+ *
+ * <h2>Single-RPC perf entry points</h2>
+ *
+ * The orthogonal {@link TableCatalog} and {@link ViewCatalog} answer two cross-cutting
+ * questions in two round trips each. {@code TableViewCatalog} adds dedicated methods so a
+ * catalog can answer both in one round trip:
+ * <ul>
+ *   <li>{@link #loadTableOrView(Identifier)} -- the resolver's per-identifier read path. Returns
+ *       a regular {@link Table} for a table, or a {@link MetadataTable} wrapping a
+ *       {@link ViewInfo} for a view. Saves the {@code loadTable} -> {@code loadView} fallback
+ *       on a cold cache.</li>
+ *   <li>{@link #listTableAndViewSummaries(String[])} -- a unified listing of tables and views
+ *       with the kind preserved on each {@link TableSummary}. Default impl performs both
+ *       {@link TableCatalog#listTableSummaries} and {@link ViewCatalog#listViews}; override to
+ *       fetch in one round trip.</li>
+ * </ul>
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public interface TableViewCatalog extends TableCatalog, ViewCatalog {
+
+  /**
+   * Load metadata for an identifier that may resolve to either a table or a view.
+   * <p>
+   * For a table, returns the table's {@link Table}. For a view, returns a
+   * {@link MetadataTable} wrapping a {@link ViewInfo}; callers discriminate via
+   * {@code getTableInfo() instanceof ViewInfo}. This lets the resolver answer in a single RPC
+   * instead of falling back from {@link TableCatalog#loadTable} to {@link ViewCatalog#loadView}.
+   *
+   * @param ident the identifier
+   * @return a {@link Table} for tables, or a {@link MetadataTable} wrapping a
+   *         {@link ViewInfo} for views
+   * @throws NoSuchTableException if neither a table nor a view exists at {@code ident}
+   */
+  Table loadTableOrView(Identifier ident) throws NoSuchTableException;
+
+  /**
+   * List the tables and views in a namespace, returned as {@link TableSummary} entries with
+   * the kind preserved on each summary.
+   * <p>
+   * The default implementation enumerates via {@link TableCatalog#listTableSummaries} for
+   * tables and {@link ViewCatalog#listViews} for views (two round trips). Catalogs that can
+   * fetch the unified listing in a single round trip should override.
+   *
+   * @param namespace a multi-part namespace
+   * @return an array of summaries for both tables and views in the namespace
+   * @throws NoSuchNamespaceException if the namespace does not exist (optional)
+   * @throws NoSuchTableException if a table listed by the underlying enumeration disappears
+   *                              before its summary can be assembled (default impl only)
+   */
+  default TableSummary[] listTableAndViewSummaries(String[] namespace)
+      throws NoSuchNamespaceException, NoSuchTableException {
+    TableSummary[] tableSummaries = listTableSummaries(namespace);
+    Identifier[] viewIdentifiers = listViews(namespace);
+    ArrayList<TableSummary> all = new ArrayList<>(
+        tableSummaries.length + viewIdentifiers.length);
+    for (TableSummary s : tableSummaries) {
+      all.add(s);
+    }
+    for (Identifier id : viewIdentifiers) {
+      all.add(TableSummary.of(id, TableSummary.VIEW_TABLE_TYPE));
+    }
+    return all.toArray(TableSummary[]::new);
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p>
+   * The default implementation derives from {@link #loadTableOrView}: a {@link MetadataTable}
+   * wrapping a {@link ViewInfo} is rejected as not-a-table; anything else is returned. Override
+   * only if a tables-only path is materially cheaper than the unified one.
+   */
+  @Override
+  default Table loadTable(Identifier ident) throws NoSuchTableException {
+    Table t = loadTableOrView(ident);
+    if (t instanceof MetadataTable mot && mot.getTableInfo() instanceof ViewInfo) {
+      throw new NoSuchTableException(ident);
+    }
+    return t;
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p>
+   * The default implementation derives from {@link #loadTableOrView}: a {@link MetadataTable}
+   * wrapping a {@link ViewInfo} is unwrapped and returned; anything else (table or absent) is
+   * surfaced as {@link NoSuchViewException}. Override only if a views-only path is materially
+   * cheaper than the unified one.
+   */
+  @Override
+  default ViewInfo loadView(Identifier ident) throws NoSuchViewException {
+    Table t;
+    try {
+      t = loadTableOrView(ident);
+    } catch (NoSuchTableException e) {
+      throw new NoSuchViewException(ident);
+    }
+    if (t instanceof MetadataTable mot && mot.getTableInfo() instanceof ViewInfo vi) {
+      return vi;
+    }
+    throw new NoSuchViewException(ident);
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p>
+   * The default implementation derives from {@link #loadTableOrView}: returns {@code true} only if
+   * the entry exists and is not a view. Override only if a cheaper existence-check path exists.
+   */
+  @Override
+  default boolean tableExists(Identifier ident) {
+    try {
+      Table t = loadTableOrView(ident);
+      return !(t instanceof MetadataTable mot && mot.getTableInfo() instanceof ViewInfo);
+    } catch (NoSuchTableException e) {
+      return false;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p>
+   * The default implementation derives from {@link #loadTableOrView}: returns {@code true} only if
+   * the entry exists and is a view. Override only if a cheaper existence-check path exists.
+   */
+  @Override
+  default boolean viewExists(Identifier ident) {
+    try {
+      Table t = loadTableOrView(ident);
+      return t instanceof MetadataTable mot && mot.getTableInfo() instanceof ViewInfo;
+    } catch (NoSuchTableException e) {
+      return false;
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TransactionalCatalogPlugin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TransactionalCatalogPlugin.java
new file mode 100644
index 0000000000000..daa3176dcbba5
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TransactionalCatalogPlugin.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.catalog.transactions.Transaction;
+import org.apache.spark.sql.connector.catalog.transactions.TransactionInfo;
+
+/**
+ * A {@link CatalogPlugin} that supports transactions.
+ * <p>
+ * Catalogs that implement this interface opt in to transactional query execution. A catalog
+ * implementing this interface is responsible for starting transactions.
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public interface TransactionalCatalogPlugin extends CatalogPlugin {
+
+  /**
+   * Begins a new transaction and returns a {@link Transaction} representing it.
+   *
+   * @param info metadata about the transaction being started.
+   */
+  Transaction beginTransaction(TransactionInfo info);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java
index 4bc2aa6e18ae6..058e105758c72 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TruncatableTable.java
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.metric.CustomMetric;
+import org.apache.spark.sql.connector.metric.CustomTaskMetric;
 
 /**
  * Represents a table which can be atomically truncated.
@@ -34,4 +36,25 @@ public interface TruncatableTable extends Table {
    * @since 3.2.0
    */
   boolean truncateTable();
+
+  /**
+   * Returns an array of supported custom metrics with name and description.
+   * By default it returns empty array.
+   *
+   * @since 4.2.0
+   */
+  default CustomMetric[] supportedCustomMetrics() {
+    return new CustomMetric[]{};
+  }
+
+  /**
+   * Returns an array of custom metrics which are collected with values at the driver side only.
+   * Note that these metrics must be included in the supported custom metrics reported by
+   * `supportedCustomMetrics`.
+   *
+   * @since 4.2.0
+   */
+  default CustomTaskMetric[] reportDriverMetrics() {
+    return new CustomTaskMetric[]{};
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java
deleted file mode 100644
index a4dc5f2f2d20f..0000000000000
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.connector.catalog;
-
-import java.util.Map;
-
-import org.apache.spark.annotation.DeveloperApi;
-import org.apache.spark.sql.types.StructType;
-
-/**
- * An interface representing a persisted view.
- */
-@DeveloperApi
-public interface View {
-  /**
-   * A name to identify this view.
-   */
-  String name();
-
-  /**
-   * The view query SQL text.
-   */
-  String query();
-
-  /**
-   * The current catalog when the view is created.
-   */
-  String currentCatalog();
-
-  /**
-   * The current namespace when the view is created.
-   */
-  String[] currentNamespace();
-
-  /**
-   * The schema for the view when the view is created after applying column aliases.
-   */
-  StructType schema();
-
-  /**
-   * The output column names of the query that creates this view.
-   */
-  String[] queryColumnNames();
-
-  /**
-   * The view column aliases.
-   */
-  String[] columnAliases();
-
-  /**
-   * The view column comments.
-   */
-  String[] columnComments();
-
-  /**
-   * The view properties.
-   */
-  Map<String, String> properties();
-}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java
index abe5fb3148d08..0e74b22079bfa 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java
@@ -14,185 +14,151 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql.connector.catalog;
 
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchViewException;
 import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException;
 
 /**
- * Catalog methods for working with views.
+ * Catalog API for connectors that expose views.
+ * <p>
+ * Connectors that expose <i>only</i> views implement this interface. Connectors that expose
+ * both tables and views must implement {@link TableViewCatalog} (which extends both this
+ * interface and {@link TableCatalog} and adds the cross-cutting contract for the combined
+ * case); the methods on this interface remain view-only -- they do not interact with tables.
+ * <p>
+ * The presence of {@code ViewCatalog} on the catalog plugin <i>is</i> the signal that it
+ * supports views; there is no capability flag to declare.
+ *
+ * @since 4.2.0
  */
-@DeveloperApi
+@Evolving
 public interface ViewCatalog extends CatalogPlugin {
 
-  /**
-   * A reserved property to specify the description of the view.
-   */
-  String PROP_COMMENT = "comment";
-
-  /**
-   * A reserved property to specify the owner of the view.
-   */
-  String PROP_OWNER = "owner";
-
-  /**
-   * A reserved property to specify the software version used to create the view.
-   */
-  String PROP_CREATE_ENGINE_VERSION = "create_engine_version";
-
-  /**
-   * A reserved property to specify the software version used to change the view.
-   */
-  String PROP_ENGINE_VERSION = "engine_version";
-
-  /**
-   * All reserved properties of the view.
-   */
-  List<String> RESERVED_PROPERTIES = Arrays.asList(
-        PROP_COMMENT,
-        PROP_OWNER,
-        PROP_CREATE_ENGINE_VERSION,
-        PROP_ENGINE_VERSION);
-
   /**
    * List the views in a namespace from the catalog.
-   * <p>
-   * If the catalog supports tables, this must return identifiers for only views and not tables.
    *
    * @param namespace a multi-part namespace
-   * @return an array of Identifiers for views
-   * @throws NoSuchNamespaceException If the namespace does not exist (optional).
+   * @return an array of identifiers for views
+   * @throws NoSuchNamespaceException if the namespace does not exist (optional)
    */
-  Identifier[] listViews(String... namespace) throws NoSuchNamespaceException;
+  Identifier[] listViews(String[] namespace) throws NoSuchNamespaceException;
 
   /**
-   * Load view metadata by {@link Identifier ident} from the catalog.
-   * <p>
-   * If the catalog supports tables and contains a table for the identifier and not a view,
-   * this must throw {@link NoSuchViewException}.
+   * Load view metadata by identifier.
    *
    * @param ident a view identifier
-   * @return the view description
-   * @throws NoSuchViewException If the view doesn't exist or is a table
+   * @return the view metadata
+   * @throws NoSuchViewException if the view does not exist
    */
-  View loadView(Identifier ident) throws NoSuchViewException;
+  ViewInfo loadView(Identifier ident) throws NoSuchViewException;
 
   /**
-   * Invalidate cached view metadata for an {@link Identifier identifier}.
+   * Test whether a view exists.
    * <p>
-   * If the view is already loaded or cached, drop cached data. If the view does not exist or is
-   * not cached, do nothing. Calling this method should not query remote services.
+   * The default implementation calls {@link #loadView} and catches {@link NoSuchViewException}.
+   * Catalogs that can answer existence cheaply should override.
    *
    * @param ident a view identifier
+   * @return true if a view exists at {@code ident}, false otherwise
    */
-  default void invalidateView(Identifier ident) {
+  default boolean viewExists(Identifier ident) {
+    try {
+      loadView(ident);
+      return true;
+    } catch (NoSuchViewException e) {
+      return false;
+    }
   }
 
   /**
-   * Test whether a view exists using an {@link Identifier identifier} from the catalog.
+   * Invalidate cached metadata for a view.
    * <p>
-   * If the catalog supports views and contains a view for the identifier and not a table,
-   * this must return false.
+   * If the view is currently cached, drop the cached entry; otherwise do nothing. This must not
+   * issue remote calls.
    *
    * @param ident a view identifier
-   * @return true if the view exists, false otherwise
    */
-  default boolean viewExists(Identifier ident) {
-    try {
-      return loadView(ident) != null;
-    } catch (NoSuchViewException e) {
-      return false;
-    }
+  default void invalidateView(Identifier ident) {
   }
 
   /**
-   * Create a view in the catalog.
+   * Create a view.
    *
-   * @param viewInfo the info class holding all view information
-   * @return the created view. This can be null if getting the metadata for the view is expensive
-   * @throws ViewAlreadyExistsException If a view or table already exists for the identifier
-   * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
+   * @param ident the view identifier
+   * @param info  the view metadata
+   * @return the metadata of the newly created view; may equal {@code info}
+   * @throws ViewAlreadyExistsException if a view already exists at {@code ident}
+   * @throws NoSuchNamespaceException   if the identifier's namespace does not exist (optional)
    */
-  View createView(ViewInfo viewInfo) throws ViewAlreadyExistsException, NoSuchNamespaceException;
+  ViewInfo createView(Identifier ident, ViewInfo info)
+      throws ViewAlreadyExistsException, NoSuchNamespaceException;
 
   /**
-   * Replace a view in the catalog.
+   * Atomically replace an existing view's metadata.
    * <p>
-   * The default implementation has a race condition.
-   * Catalogs are encouraged to implement this operation atomically.
+   * Used by {@code ALTER VIEW ... AS}. Implementations should commit the new metadata
+   * atomically; views carry no data, so a single transactional metastore call (or equivalent)
+   * is sufficient -- there is no separate staging API.
    *
-   * @param viewInfo the info class holding all view information
-   * @param orCreate create the view if it doesn't exist
-   * @return the created/replaced view. This can be null if getting the metadata
-   *         for the view is expensive
-   * @throws NoSuchViewException If the view doesn't exist or is a table
-   * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
+   * @param ident the view identifier
+   * @param info  the new view metadata
+   * @return the metadata of the replaced view; may equal {@code info}
+   * @throws NoSuchViewException if no view exists at {@code ident}
    */
-  default View replaceView(
-      ViewInfo viewInfo,
-      boolean orCreate)
-      throws NoSuchViewException, NoSuchNamespaceException {
-    if (viewExists(viewInfo.ident())) {
-      dropView(viewInfo.ident());
-    } else if (!orCreate) {
-      throw new NoSuchViewException(viewInfo.ident());
-    }
-
-    try {
-      return createView(viewInfo);
-    } catch (ViewAlreadyExistsException e) {
-      throw new RuntimeException("Race condition when creating/replacing view", e);
-    }
-  }
+  ViewInfo replaceView(Identifier ident, ViewInfo info) throws NoSuchViewException;
 
   /**
-   * Apply {@link ViewChange changes} to a view in the catalog.
+   * Create a view if one does not exist at {@code ident}, or atomically replace it if one does.
    * <p>
-   * Implementations may reject the requested changes. If any change is rejected, none of the
-   * changes should be applied to the view.
+   * Used by {@code CREATE OR REPLACE VIEW}. The default implementation calls
+   * {@link #replaceView}, falling back to {@link #createView} on
+   * {@link NoSuchViewException}. The fallback is non-atomic across the two calls (a concurrent
+   * drop or create can race), so catalogs that can answer the upsert in a single transactional
+   * call should override this method to collapse to one RPC and to make the swap atomic.
    *
-   * @param ident a view identifier
-   * @param changes an array of changes to apply to the view
-   * @return the view altered
-   * @throws NoSuchViewException If the view doesn't exist or is a table.
-   * @throws IllegalArgumentException If any change is rejected by the implementation.
+   * @param ident the view identifier
+   * @param info  the view metadata
+   * @return the metadata of the created or replaced view; may equal {@code info}
+   * @throws ViewAlreadyExistsException if {@code ident} cannot host this view -- either a
+   *                                    concurrent {@code CREATE VIEW} won the race in the
+   *                                    default impl's gap between {@link #replaceView} and
+   *                                    the fallback {@link #createView}, or, in a
+   *                                    {@link TableViewCatalog}, a table sits at {@code ident}
+   * @throws NoSuchNamespaceException   if the identifier's namespace does not exist (optional)
    */
-  View alterView(Identifier ident, ViewChange... changes)
-      throws NoSuchViewException, IllegalArgumentException;
+  default ViewInfo createOrReplaceView(Identifier ident, ViewInfo info)
+      throws ViewAlreadyExistsException, NoSuchNamespaceException {
+    try {
+      return replaceView(ident, info);
+    } catch (NoSuchViewException e) {
+      return createView(ident, info);
+    }
+  }
 
   /**
-   * Drop a view in the catalog.
-   * <p>
-   * If the catalog supports tables and contains a table for the identifier and not a view, this
-   * must not drop the table and must return false.
+   * Drop a view.
    *
    * @param ident a view identifier
-   * @return true if a view was deleted, false if no view exists for the identifier
+   * @return true if a view was dropped, false otherwise
    */
   boolean dropView(Identifier ident);
 
   /**
-   * Rename a view in the catalog.
-   * <p>
-   * If the catalog supports tables and contains a table with the old identifier, this throws
-   * {@link NoSuchViewException}. Additionally, if it contains a table with the new identifier,
-   * this throws {@link ViewAlreadyExistsException}.
+   * Rename a view.
    * <p>
-   * If the catalog does not support view renames between namespaces, it throws
-   * {@link UnsupportedOperationException}.
+   * If the catalog supports tables and contains a table at the new identifier, this must throw
+   * {@link ViewAlreadyExistsException}. If the source identifier resolves to a table rather than
+   * a view, this must throw {@link NoSuchViewException}. The cross-type contract for catalogs
+   * that expose both tables and views lives on {@link TableViewCatalog}.
    *
    * @param oldIdent the view identifier of the existing view to rename
-   * @param newIdent the new view identifier of the view
-   * @throws NoSuchViewException If the view to rename doesn't exist or is a table
-   * @throws ViewAlreadyExistsException If the new view name already exists or is a table
-   * @throws UnsupportedOperationException If the namespaces of old and new identifiers do not
-   * match (optional)
+   * @param newIdent the new view identifier
+   * @throws NoSuchViewException        if no view exists at {@code oldIdent}
+   * @throws ViewAlreadyExistsException if a view (or, in a {@link TableViewCatalog}, a table)
+   *                                    already exists at {@code newIdent}
    */
   void renameView(Identifier oldIdent, Identifier newIdent)
       throws NoSuchViewException, ViewAlreadyExistsException;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java
deleted file mode 100644
index c94933beed7f6..0000000000000
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.connector.catalog;
-
-import org.apache.spark.annotation.DeveloperApi;
-
-/**
- * ViewChange subclasses represent requested changes to a view.
- * These are passed to {@link ViewCatalog#alterView}.
- */
-@DeveloperApi
-public interface ViewChange {
-
-  /**
-   * Create a ViewChange for setting a table property.
-   *
-   * @param property the property name
-   * @param value the new property value
-   * @return a ViewChange
-   */
-  static ViewChange setProperty(String property, String value) {
-    return new SetProperty(property, value);
-  }
-
-  /**
-   * Create a ViewChange for removing a table property.
-   *
-   * @param property the property name
-   * @return a ViewChange
-   */
-  static ViewChange removeProperty(String property) {
-    return new RemoveProperty(property);
-  }
-
-  final class SetProperty implements ViewChange {
-    private final String property;
-    private final String value;
-
-    private SetProperty(String property, String value) {
-      this.property = property;
-      this.value = value;
-    }
-
-    public String property() {
-      return property;
-    }
-
-    public String value() {
-      return value;
-    }
-  }
-
-  final class RemoveProperty implements ViewChange {
-    private final String property;
-
-    private RemoveProperty(String property) {
-      this.property = property;
-    }
-
-    public String property() {
-      return property;
-    }
-  }
-}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java
index b01e133365661..0f46e915a9be2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java
@@ -14,168 +14,159 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql.connector.catalog;
 
-import org.apache.spark.annotation.DeveloperApi;
-import org.apache.spark.sql.types.StructType;
-
-import javax.annotation.Nonnull;
-
-import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.Objects;
-import java.util.StringJoiner;
+
+import org.apache.spark.annotation.Evolving;
 
 /**
- * A class that holds view information.
+ * View metadata DTO -- the typed payload returned by {@link ViewCatalog#loadView} and accepted
+ * by {@link ViewCatalog#createView} / {@link ViewCatalog#replaceView}. Carries the
+ * view-specific fields that cannot be represented as string table properties: the query text,
+ * captured creation-time resolution context, captured SQL configs, schema-binding mode, and
+ * query output column names. Schema and user TBLPROPERTIES are inherited from {@link TableInfo}
+ * via the typed builder.
+ * <p>
+ * {@code ViewInfo} extends {@link TableInfo} so that a {@link TableViewCatalog} can opt into the
+ * single-RPC perf path by returning a {@link MetadataTable} wrapping a {@code ViewInfo}
+ * from {@link TableViewCatalog#loadTableOrView} for a view identifier. Pure {@link ViewCatalog}
+ * implementations never see {@code TableInfo}; the typed setters on {@link Builder} cover
+ * everything they need to construct a {@code ViewInfo}.
+ *
+ * @since 4.2.0
  */
-@DeveloperApi
-public class ViewInfo {
-  private final Identifier ident;
-  private final String sql;
+@Evolving
+public class ViewInfo extends TableInfo {
+
+  private final String queryText;
   private final String currentCatalog;
   private final String[] currentNamespace;
-  private final StructType schema;
+  private final Map<String, String> sqlConfigs;
+  private final String schemaMode;
   private final String[] queryColumnNames;
-  private final String[] columnAliases;
-  private final String[] columnComments;
-  private final Map<String, String> properties;
-
-  public ViewInfo(
-      Identifier ident,
-      String sql,
-      String currentCatalog,
-      String[] currentNamespace,
-      StructType schema,
-      String[] queryColumnNames,
-      String[] columnAliases,
-      String[] columnComments,
-      Map<String, String> properties) {
-    this.ident = ident;
-    this.sql = sql;
-    this.currentCatalog = currentCatalog;
-    this.currentNamespace = currentNamespace;
-    this.schema = schema;
-    this.queryColumnNames = queryColumnNames;
-    this.columnAliases = columnAliases;
-    this.columnComments = columnComments;
-    this.properties = properties;
+  private final DependencyList viewDependencies;
+
+  protected ViewInfo(Builder builder) {
+    super(builder);
+    this.queryText = Objects.requireNonNull(builder.queryText, "queryText should not be null");
+    this.currentCatalog = builder.currentCatalog;
+    this.currentNamespace = builder.currentNamespace;
+    this.sqlConfigs = Collections.unmodifiableMap(builder.sqlConfigs);
+    this.schemaMode = builder.schemaMode;
+    this.queryColumnNames = builder.queryColumnNames;
+    this.viewDependencies = builder.viewDependencies;
+    // Default PROP_TABLE_TYPE = VIEW so `properties()` reflects the typed ViewInfo
+    // classification. Callers can refine to a more specific view kind (for example,
+    // METRIC_VIEW) by calling BaseBuilder.withTableType(...) on the builder before build().
+    properties().putIfAbsent(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE);
   }
 
-  /**
-   * @return The view identifier
-   */
-  @Nonnull
-  public Identifier ident() {
-    return ident;
-  }
+  /** The SQL text of the view. */
+  public String queryText() { return queryText; }
 
   /**
-   * @return The SQL text that defines the view
+   * The current catalog at the time the view was created, used to resolve unqualified
+   * identifiers in {@link #queryText()} at read time. May be {@code null} if the view was
+   * created with no captured resolution context.
    */
-  @Nonnull
-  public String sql() {
-    return sql;
-  }
+  public String currentCatalog() { return currentCatalog; }
 
   /**
-   * @return The current catalog
+   * The current namespace at the time the view was created, used alongside
+   * {@link #currentCatalog()} to resolve unqualified identifiers in {@link #queryText()} at
+   * read time. Never {@code null}; empty when no namespace was captured.
    */
-  @Nonnull
-  public String currentCatalog() {
-    return currentCatalog;
-  }
+  public String[] currentNamespace() { return currentNamespace; }
 
   /**
-   * @return The current namespace
+   * The SQL configs captured at view creation time, applied when parsing and analyzing the
+   * view body. Keys are unprefixed SQL config names (e.g. {@code spark.sql.ansi.enabled}).
    */
-  @Nonnull
-  public String[] currentNamespace() {
-    return currentNamespace;
-  }
+  public Map<String, String> sqlConfigs() { return sqlConfigs; }
 
   /**
-   * @return The view query output schema
+   * The view's schema binding mode. Allowed values match the {@code toString} form of
+   * {@code org.apache.spark.sql.catalyst.analysis.ViewSchemaMode}:
+   * {@code BINDING}, {@code COMPENSATION}, {@code TYPE EVOLUTION}, {@code EVOLUTION}.
+   * May be {@code null} when schema binding is not configured.
    */
-  @Nonnull
-  public StructType schema() {
-    return schema;
-  }
+  public String schemaMode() { return schemaMode; }
 
   /**
-   * @return The query column names
+   * Output column names of the query that created the view, used to map the query output to
+   * the view's declared columns during view resolution. Empty for views in {@code EVOLUTION}
+   * mode, which always use the view's current schema.
    */
-  @Nonnull
-  public String[] queryColumnNames() {
-    return queryColumnNames;
-  }
+  public String[] queryColumnNames() { return queryColumnNames; }
 
   /**
-   * @return The column aliases
+   * Returns the structured list of objects this view depends on (source tables and functions),
+   * or {@code null} if no dependency list was supplied. Unlike other view metadata which is
+   * encoded into {@link #properties()}, dependency lists are a first-class field because their
+   * nested structure does not round-trip cleanly through flat string properties.
    */
-  @Nonnull
-  public String[] columnAliases() {
-    return columnAliases;
-  }
+  public DependencyList viewDependencies() { return viewDependencies; }
+
+  public static class Builder extends BaseBuilder<Builder> {
+    private String queryText;
+    private String currentCatalog;
+    private String[] currentNamespace = new String[0];
+    private Map<String, String> sqlConfigs = new HashMap<>();
+    private String schemaMode;
+    private String[] queryColumnNames = new String[0];
+    private DependencyList viewDependencies = null;
+
+    @Override
+    protected Builder self() { return this; }
+
+    public Builder withQueryText(String queryText) {
+      this.queryText = queryText;
+      return this;
+    }
 
-  /**
-   * @return The column comments
-   */
-  @Nonnull
-  public String[] columnComments() {
-    return columnComments;
-  }
+    public Builder withCurrentCatalog(String currentCatalog) {
+      this.currentCatalog = currentCatalog;
+      return this;
+    }
 
-  /**
-   * @return The view properties
-   */
-  @Nonnull
-  public Map<String, String> properties() {
-    return properties;
-  }
+    public Builder withCurrentNamespace(String[] currentNamespace) {
+      this.currentNamespace = currentNamespace == null ? new String[0] : currentNamespace;
+      return this;
+    }
 
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
+    public Builder withSqlConfigs(Map<String, String> sqlConfigs) {
+      this.sqlConfigs = new HashMap<>(sqlConfigs);
+      return this;
     }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
+
+    public Builder withSchemaMode(String schemaMode) {
+      this.schemaMode = schemaMode;
+      return this;
     }
-    ViewInfo viewInfo = (ViewInfo) o;
-    return ident.equals(viewInfo.ident) && sql.equals(viewInfo.sql) &&
-        currentCatalog.equals(viewInfo.currentCatalog) &&
-        Arrays.equals(currentNamespace, viewInfo.currentNamespace) &&
-        schema.equals(viewInfo.schema) &&
-        Arrays.equals(queryColumnNames, viewInfo.queryColumnNames) &&
-        Arrays.equals(columnAliases, viewInfo.columnAliases) &&
-        Arrays.equals(columnComments, viewInfo.columnComments) &&
-        properties.equals(viewInfo.properties);
-  }
 
-  @Override
-  public int hashCode() {
-    int result = Objects.hash(ident, sql, currentCatalog, schema, properties);
-    result = 31 * result + Arrays.hashCode(currentNamespace);
-    result = 31 * result + Arrays.hashCode(queryColumnNames);
-    result = 31 * result + Arrays.hashCode(columnAliases);
-    result = 31 * result + Arrays.hashCode(columnComments);
-    return result;
-  }
+    public Builder withQueryColumnNames(String[] queryColumnNames) {
+      this.queryColumnNames = queryColumnNames == null ? new String[0] : queryColumnNames;
+      return this;
+    }
 
-  @Override
-  public String toString() {
-    return new StringJoiner(", ", ViewInfo.class.getSimpleName() + "[", "]")
-        .add("ident=" + ident)
-        .add("sql='" + sql + "'")
-        .add("currentCatalog='" + currentCatalog + "'")
-        .add("currentNamespace=" + Arrays.toString(currentNamespace))
-        .add("schema=" + schema)
-        .add("queryColumnNames=" + Arrays.toString(queryColumnNames))
-        .add("columnAliases=" + Arrays.toString(columnAliases))
-        .add("columnComments=" + Arrays.toString(columnComments))
-        .add("properties=" + properties)
-        .toString();
+    /**
+     * Sets the structured dependency list for this view. Source tables and functions referenced
+     * by the view text should be recorded here so downstream consumers (e.g. catalogs persisting
+     * lineage) can access them without re-analyzing the view body.
+     */
+    public Builder withViewDependencies(DependencyList viewDependencies) {
+      this.viewDependencies = viewDependencies;
+      return this;
+    }
+
+    @Override
+    public ViewInfo build() {
+      Objects.requireNonNull(columns, "columns should not be null");
+      return new ViewInfo(this);
+    }
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/constraints/BaseConstraint.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/constraints/BaseConstraint.java
index f93d716a27842..186d2b5dedade 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/constraints/BaseConstraint.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/constraints/BaseConstraint.java
@@ -65,24 +65,17 @@ public boolean rely() {
   public String toDDL() {
     // The validation status is not included in the DDL output as it's not part of
     // the Spark SQL syntax for constraints.
+    return "CONSTRAINT " + name + " " + toDescription();
+  }
+
+  public String toDescription() {
     return String.format(
-        "CONSTRAINT %s %s %s %s",
-        name,
+        "%s %s %s",
         definition(),
         enforced ? "ENFORCED" : "NOT ENFORCED",
         rely ? "RELY" : "NORELY");
   }
 
-  public String toDescription() {
-    StringJoiner joiner = new StringJoiner(" ");
-    joiner.add(definition());
-    joiner.add(enforced ? "ENFORCED" : "NOT ENFORCED");
-    if (rely) {
-      joiner.add("RELY");
-    }
-    return joiner.toString();
-  }
-
   @Override
   public String toString() {
     return toDDL();
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/transactions/Transaction.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/transactions/Transaction.java
new file mode 100644
index 0000000000000..130c66d05a325
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/transactions/Transaction.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog.transactions;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.catalog.CatalogPlugin;
+import org.apache.spark.sql.connector.catalog.TransactionalCatalogPlugin;
+import org.apache.spark.sql.connector.read.Scan;
+
+import java.io.Closeable;
+
+/**
+ * Represents a transaction.
+ * <p>
+ * Spark begins a transaction with {@link TransactionalCatalogPlugin#beginTransaction} and
+ * executes read/write operations against the transaction's catalog. On success, Spark
+ * calls {@link #commit()}; on failure, Spark calls {@link #abort()}. In both cases Spark
+ * subsequently calls {@link #close()} to release resources.
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public interface Transaction extends Closeable {
+
+  /**
+   * Returns the catalog associated with this transaction. This catalog is responsible for tracking
+   * read/write operations that occur within the boundaries of a transaction. This allows
+   * connectors to perform conflict resolution at commit time.
+   */
+  CatalogPlugin catalog();
+
+  /**
+   * Commits the transaction. All writes performed under it become visible to other readers.
+   * <p>
+   * The connector is responsible for detecting and resolving conflicting commits or throwing
+   * an exception if resolution is not possible.
+   * <p>
+   * This method will be called exactly once per transaction. Spark calls {@link #close()}
+   * immediately after this method returns.
+   *
+   * @throws IllegalStateException if the transaction has already been committed or aborted.
+   */
+  void commit();
+
+  /**
+   * Aborts the transaction, discarding any staged changes.
+   * <p>
+   * This method must be idempotent. If the transaction has already been committed or aborted,
+   * invoking it must have no effect.
+   * <p>
+   * Spark calls {@link #close()} immediately after this method returns.
+   */
+  void abort();
+
+  /**
+   * Attempts to register materialized scans against this transaction's read set.
+   * <p>
+   * An example use case is cache reuse. Spark passes the scans of a candidate cached subtree
+   * for the transaction's catalog and the connector decides whether to accept them.
+   * <p>
+   * The connector must either accept all passed scans (returning {@code true} after adding
+   * the scans to the read set) or refuse (returning {@code false} without modifying
+   * the read set).
+   *
+   * @param scans the materialized scans Spark offers for registration against
+   *              this transaction's read set.
+   * @return true if the connector accepts the scans; false otherwise.
+   */
+  boolean registerScans(Scan[] scans);
+
+  /**
+   * Releases any resources held by this transaction.
+   * <p>
+   * Spark always calls this method after {@link #commit()} or {@link #abort()}, regardless of
+   * whether those methods succeed or not.
+   * <p>
+   * This method must be idempotent. If the transaction has already been closed,
+   * invoking it must have no effect.
+   */
+  @Override
+  void close();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/transactions/TransactionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/transactions/TransactionInfo.java
new file mode 100644
index 0000000000000..3e6979cec469f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/transactions/TransactionInfo.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog.transactions;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Metadata about a transaction.
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public interface TransactionInfo {
+  /**
+   * Returns a unique identifier for this transaction.
+   */
+  String id();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SampleMethod.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SampleMethod.java
new file mode 100644
index 0000000000000..b9af8f9d5ac7f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SampleMethod.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * The sampling method for TABLESAMPLE.
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public enum SampleMethod {
+  /** Row-level sampling (BERNOULLI). Each row is independently selected. */
+  BERNOULLI,
+  /** Block-level sampling (SYSTEM). Entire partitions/splits are included or skipped. */
+  SYSTEM
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTableSample.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTableSample.java
index 3630feb4680ea..000588e9c318e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTableSample.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTableSample.java
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.errors.QueryCompilationErrors;
 
 /**
  * A mix-in interface for {@link Scan}. Data sources can implement this interface to
@@ -29,11 +30,31 @@
 public interface SupportsPushDownTableSample extends ScanBuilder {
 
   /**
-   * Pushes down SAMPLE to the data source.
+   * Pushes down BERNOULLI (row-level) SAMPLE to the data source.
    */
-  boolean pushTableSample(
+  @Deprecated(since = "4.2.0")
+  default boolean pushTableSample(
       double lowerBound,
       double upperBound,
       boolean withReplacement,
-      long seed);
+      long seed) {
+    throw QueryCompilationErrors.mustOverrideOneMethodError("pushTableSample");
+  }
+
+  /**
+   * Pushes down SAMPLE to the data source with the specified sampling method.
+   */
+  default boolean pushTableSample(
+      double lowerBound,
+      double upperBound,
+      boolean withReplacement,
+      long seed,
+      SampleMethod sampleMethod) {
+    if (sampleMethod == SampleMethod.SYSTEM) {
+      // If the data source hasn't overridden this method, it must not have added support
+      // for SYSTEM sampling. Don't apply sample pushdown.
+      return false;
+    }
+    return pushTableSample(lowerBound, upperBound, withReplacement, seed);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index 64727c0e40f2a..903cb70a79eba 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -54,11 +54,17 @@
 public class V2ExpressionSQLBuilder {
 
   /**
-   * Escape the special chars for like pattern.
+   * Escape the LIKE pattern special chars, using {@code \} as the escape character. The LIKE
+   * patterns produced by {@link #visitStartsWith}, {@link #visitEndsWith} and
+   * {@link #visitContains} declare {@code ESCAPE '\'}, so the wildcards {@code _} and {@code %}
+   * and the escape character {@code \} itself must each be prefixed with {@code \} to be matched
+   * literally.
    *
    * Note: This method adopts the escape representation within Spark and is not bound to any JDBC
-   * dialect. JDBC dialect should overwrite this API if the underlying database have more special
-   * chars other than _ and %.
+   * dialect. A JDBC dialect should overwrite this API if the underlying database has more LIKE
+   * special chars than {@code _}, {@code %} and {@code \}. Escaping that is instead needed because
+   * the database treats a character specially inside a SQL <em>string literal</em> belongs in
+   * {@link #escapeStringLiteralForLikePattern}.
    */
   protected String escapeSpecialCharsForLikePattern(String str) {
     StringBuilder builder = new StringBuilder();
@@ -67,6 +73,7 @@ protected String escapeSpecialCharsForLikePattern(String str) {
       switch (c) {
         case '_' -> builder.append("\\_");
         case '%' -> builder.append("\\%");
+        case '\\' -> builder.append("\\\\");
         default -> builder.append(c);
       }
     }
@@ -74,6 +81,22 @@ protected String escapeSpecialCharsForLikePattern(String str) {
     return builder.toString();
   }
 
+  /**
+   * Escape the characters that the target database treats specially inside a SQL string literal,
+   * applied to a LIKE pattern (and its ESCAPE character) when embedding it into a {@code '...'}
+   * literal for predicate pushdown.
+   *
+   * The default returns the input unchanged: a standard SQL string literal is taken verbatim (the
+   * single-quote doubling is already applied when the literal is rendered), so the {@code \} that
+   * {@link #escapeSpecialCharsForLikePattern} uses as the LIKE escape character reaches the LIKE
+   * engine intact. A dialect whose string-literal syntax gives {@code \} a special meaning (e.g.
+   * MySQL, which treats {@code \} as an escape character inside string literals) must override this
+   * to double the backslash, so the LIKE pattern survives string-literal parsing unchanged.
+   */
+  protected String escapeStringLiteralForLikePattern(String str) {
+    return str;
+  }
+
   public String build(Expression expr) {
     if (expr instanceof Literal literal) {
       return visitLiteral(literal);
@@ -196,21 +219,33 @@ protected String visitStartsWith(String l, String r) {
     // Remove quotes at the beginning and end.
     // e.g. converts "'str'" to "str".
     String value = r.substring(1, r.length() - 1);
-    return l + " LIKE '" + escapeSpecialCharsForLikePattern(value) + "%' ESCAPE '\\'";
+    return likeWithEscape(l, escapeSpecialCharsForLikePattern(value) + "%");
   }
 
   protected String visitEndsWith(String l, String r) {
     // Remove quotes at the beginning and end.
     // e.g. converts "'str'" to "str".
     String value = r.substring(1, r.length() - 1);
-    return l + " LIKE '%" + escapeSpecialCharsForLikePattern(value) + "' ESCAPE '\\'";
+    return likeWithEscape(l, "%" + escapeSpecialCharsForLikePattern(value));
   }
 
   protected String visitContains(String l, String r) {
     // Remove quotes at the beginning and end.
     // e.g. converts "'str'" to "str".
     String value = r.substring(1, r.length() - 1);
-    return l + " LIKE '%" + escapeSpecialCharsForLikePattern(value) + "%' ESCAPE '\\'";
+    return likeWithEscape(l, "%" + escapeSpecialCharsForLikePattern(value) + "%");
+  }
+
+  /**
+   * Build a {@code <left> LIKE '<pattern>' ESCAPE '\'} predicate. The pattern already has its LIKE
+   * special chars escaped (via {@link #escapeSpecialCharsForLikePattern}); both the pattern and
+   * the {@code \} escape character are then passed through
+   * {@link #escapeStringLiteralForLikePattern} so a dialect can add any string-literal escaping
+   * its SQL syntax requires.
+   */
+  private String likeWithEscape(String l, String pattern) {
+    return l + " LIKE '" + escapeStringLiteralForLikePattern(pattern)
+      + "' ESCAPE '" + escapeStringLiteralForLikePattern("\\") + "'";
   }
 
   protected String inputToSQL(Expression input) {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
index 44fc5f9d794bf..359cb7a354aac 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
@@ -85,6 +85,13 @@ default void onDataWriterCommit(WriterCommitMessage message) {}
    * disable this behavior by overriding {@link #useCommitCoordinator()}. If disabled, multiple
    * tasks may have committed successfully and one successful commit message per task will be
    * passed to this commit method. The remaining commit messages are ignored by Spark.
+   * <p>
+   * Note: this method signals that all data for this write operation has been successfully written.
+   * When this write is part of a
+   * {@link org.apache.spark.sql.connector.catalog.transactions.Transaction}, connector
+   * implementations should stage the written data durably but must not make it visible to readers.
+   * Changes are propagated and made visible only when the enclosing transaction is committed via
+   * {@link org.apache.spark.sql.connector.catalog.transactions.Transaction#commit()}.
    */
   void commit(WriterCommitMessage[] messages);
 
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/AnalysisWarning.scala b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/InsertSummary.java
similarity index 61%
rename from sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/AnalysisWarning.scala
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/InsertSummary.java
index 35b8185c255e1..40f41bf238447 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/AnalysisWarning.scala
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/InsertSummary.java
@@ -15,19 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.pipelines
+package org.apache.spark.sql.connector.write;
 
-/** Represents a warning generated as part of graph analysis. */
-sealed trait AnalysisWarning
+import org.apache.spark.annotation.Evolving;
 
-object AnalysisWarning {
+/**
+ * Provides an informational summary of the INSERT operation producing write.
+ *
+ * @since 4.2.0
+ */
+@Evolving
+public interface InsertSummary extends WriteSummary {
 
   /**
-   * Warning that some streaming reader options are being dropped
-   *
-   * @param sourceName Source for which reader options are being dropped.
-   * @param droppedOptions Set of reader options that are being dropped for a specific source.
+   * Returns the number of inserted rows, or -1 if not found.
    */
-  case class StreamingReaderOptionsDropped(sourceName: String, droppedOptions: Seq[String])
-      extends AnalysisWarning
+  long numInsertedRows();
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
index ab98bc01b3aed..f4759e675a5c3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.connector.write.streaming;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.catalog.transactions.Transaction;
 import org.apache.spark.sql.connector.write.DataWriter;
 import org.apache.spark.sql.connector.write.PhysicalWriteInfo;
 import org.apache.spark.sql.connector.write.WriterCommitMessage;
@@ -80,6 +81,11 @@ default boolean useCommitCoordinator() {
    * The execution engine may call {@code commit} multiple times for the same epoch in some
    * circumstances. To support exactly-once data semantics, implementations must ensure that
    * multiple commits for the same epoch are idempotent.
+   * <p>
+   * Note: this method signals that all data for this write operation has been successfully written.
+   * When this write is part of a {@link Transaction}, connector implementations should stage the
+   * written data durably but must not make it visible to readers. Changes are propagated and made
+   * visible only when the enclosing transaction is committed via {@link Transaction#commit()}.
    */
   void commit(long epochId, WriterCommitMessage[] messages);
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 019bc258579a8..8a47e93724d95 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -28,9 +28,8 @@
 import org.apache.spark.sql.catalyst.util.STUtils;
 import org.apache.spark.sql.util.ArrowUtils;
 import org.apache.spark.sql.types.*;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
@@ -150,27 +149,21 @@ public ColumnarMap getMap(int rowId) {
   }
 
   @Override
-  public GeographyVal getGeography(int rowId) {
+  public BinaryView getBinaryView(int rowId) {
     if (isNullAt(rowId)) return null;
 
-    GeographyType gt = (GeographyType) this.type;
-    int srid = getChild(0).getInt(rowId);
-    byte[] bytes = getChild(1).getBinary(rowId);
-    gt.assertSridAllowedForType(srid);
-    // TODO(GEO-602): Geog still does not support different SRIDs, once it does,
-    // we need to update this.
-    return (bytes == null) ? null : STUtils.stGeogFromWKB(bytes);
-  }
-
-  @Override
-  public GeometryVal getGeometry(int rowId) {
-    if (isNullAt(rowId)) return null;
-
-    GeometryType gt = (GeometryType) this.type;
-    int srid = getChild(0).getInt(rowId);
-    byte[] bytes = getChild(1).getBinary(rowId);
-    gt.assertSridAllowedForType(srid);
-    return (bytes == null) ? null : STUtils.stGeomFromWKB(bytes, srid);
+    if (this.type instanceof GeographyType gt) {
+      int srid = getChild(0).getInt(rowId);
+      gt.assertSridAllowedForType(srid);
+      byte[] bytes = getChild(1).getBinary(rowId);
+      return (bytes == null) ? null : STUtils.stGeogFromWKB(bytes, srid);
+    } else if (this.type instanceof GeometryType gt) {
+      int srid = getChild(0).getInt(rowId);
+      gt.assertSridAllowedForType(srid);
+      byte[] bytes = getChild(1).getBinary(rowId);
+      return (bytes == null) ? null : STUtils.stGeomFromWKB(bytes, srid);
+    }
+    return super.getBinaryView(rowId);
   }
 
   public ArrowColumnVector(ValueVector vector) {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index 8e9a5a620b3e4..e119a1225ece5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -22,11 +22,10 @@
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.sql.types.UserDefinedType;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
 
 /**
  * An interface representing in-memory columnar data in Spark. This interface defines the main APIs
@@ -290,14 +289,14 @@ public final ColumnarRow getStruct(int rowId) {
    */
   public abstract byte[] getBinary(int rowId);
 
-  public GeographyVal getGeography(int rowId) {
-    byte[] bytes = getBinary(rowId);
-    return (bytes == null) ? null : GeographyVal.fromBytes(bytes);
-  }
-
-  public GeometryVal getGeometry(int rowId) {
+  /**
+   * Returns the opaque-bytes physical value at {@code rowId} as a {@link BinaryView}. Used by
+   * logical types whose physical representation is "an opaque chunk of bytes" - currently
+   * GEOMETRY and GEOGRAPHY. Returns {@code null} if the slot is null.
+   */
+  public BinaryView getBinaryView(int rowId) {
     byte[] bytes = getBinary(rowId);
-    return (bytes == null) ? null : GeometryVal.fromBytes(bytes);
+    return (bytes == null) ? null : BinaryView.fromBytes(bytes);
   }
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index fad1817aca199..36701f0270215 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -26,8 +26,7 @@
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 
 /**
  * Array abstraction in {@link ColumnVector}.
@@ -177,13 +176,8 @@ public byte[] getBinary(int ordinal) {
   }
 
   @Override
-  public GeographyVal getGeography(int ordinal) {
-    return data.getGeography(offset + ordinal);
-  }
-
-  @Override
-  public GeometryVal getGeometry(int ordinal) {
-    return data.getGeometry(offset + ordinal);
+  public BinaryView getBinaryView(int ordinal) {
+    return data.getBinaryView(offset + ordinal);
   }
 
   @Override
@@ -213,7 +207,7 @@ public ColumnarMap getMap(int ordinal) {
 
   @Override
   public Object get(int ordinal, DataType dataType) {
-    return SpecializedGettersReader.read(this, ordinal, dataType, false, false);
+    return SpecializedGettersReader.read(this, ordinal, dataType, false, true);
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
index 3d1e780f6e057..9ea87da489254 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
@@ -27,8 +27,7 @@
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 
 /**
  * This class wraps an array of {@link ColumnVector} and provides a row view.
@@ -74,10 +73,8 @@ public InternalRow copy() {
           row.update(i, getUTF8String(i).copy());
         } else if (pdt instanceof PhysicalBinaryType) {
           row.update(i, getBinary(i));
-        } else if (pdt instanceof PhysicalGeographyType) {
-          row.update(i, getGeography(i));
-        } else if (pdt instanceof PhysicalGeometryType) {
-          row.update(i, getGeometry(i));
+        } else if (pdt instanceof PhysicalBinaryViewType) {
+          row.update(i, getBinaryView(i).copy());
         } else if (pdt instanceof PhysicalDecimalType t) {
           row.setDecimal(i, getDecimal(i, t.precision(), t.scale()), t.precision());
         } else if (pdt instanceof PhysicalStructType t) {
@@ -141,13 +138,8 @@ public byte[] getBinary(int ordinal) {
   }
 
   @Override
-  public GeographyVal getGeography(int ordinal) {
-    return columns[ordinal].getGeography(rowId);
-  }
-
-  @Override
-  public GeometryVal getGeometry(int ordinal) {
-    return columns[ordinal].getGeometry(rowId);
+  public BinaryView getBinaryView(int ordinal) {
+    return columns[ordinal].getBinaryView(rowId);
   }
 
   @Override
@@ -195,10 +187,8 @@ public Object get(int ordinal, DataType dataType) {
       return getUTF8String(ordinal);
     } else if (dataType instanceof BinaryType) {
       return getBinary(ordinal);
-    } else if (dataType instanceof GeographyType) {
-      return getGeography(ordinal);
-    } else if (dataType instanceof GeometryType) {
-      return getGeometry(ordinal);
+    } else if (dataType instanceof GeographyType || dataType instanceof GeometryType) {
+      return getBinaryView(ordinal);
     } else if (dataType instanceof DecimalType t) {
       return getDecimal(ordinal, t.precision(), t.scale());
     } else if (dataType instanceof DateType) {
@@ -215,6 +205,8 @@ public Object get(int ordinal, DataType dataType) {
       return getMap(ordinal);
     } else if (dataType instanceof VariantType) {
       return getVariant(ordinal);
+    } else if (dataType instanceof UserDefinedType<?> udt) {
+      return get(ordinal, udt.sqlType());
     } else {
       throw new SparkUnsupportedOperationException(
         "_LEGACY_ERROR_TEMP_3152", Map.of("dataType", String.valueOf(dataType)));
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
index 656c5f8a8f30e..d12d197222140 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
@@ -25,8 +25,7 @@
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 
 /**
  * Row abstraction in {@link ColumnVector}.
@@ -79,10 +78,8 @@ public InternalRow copy() {
           row.update(i, getUTF8String(i).copy());
         } else if (pdt instanceof PhysicalBinaryType) {
           row.update(i, getBinary(i));
-        } else if (pdt instanceof PhysicalGeographyType) {
-          row.update(i, getGeography(i));
-        } else if (pdt instanceof PhysicalGeometryType) {
-          row.update(i, getGeometry(i));
+        } else if (pdt instanceof PhysicalBinaryViewType) {
+          row.update(i, getBinaryView(i).copy());
         } else if (pdt instanceof PhysicalDecimalType t) {
           row.setDecimal(i, getDecimal(i, t.precision(), t.scale()), t.precision());
         } else if (pdt instanceof PhysicalStructType t) {
@@ -146,13 +143,8 @@ public byte[] getBinary(int ordinal) {
   }
 
   @Override
-  public GeographyVal getGeography(int ordinal) {
-    return data.getChild(ordinal).getGeography(rowId);
-  }
-
-  @Override
-  public GeometryVal getGeometry(int ordinal) {
-    return data.getChild(ordinal).getGeometry(rowId);
+  public BinaryView getBinaryView(int ordinal) {
+    return data.getChild(ordinal).getBinaryView(rowId);
   }
 
   @Override
@@ -217,6 +209,8 @@ public Object get(int ordinal, DataType dataType) {
       return getMap(ordinal);
     } else if (dataType instanceof VariantType) {
       return getVariant(ordinal);
+    } else if (dataType instanceof UserDefinedType<?> udt) {
+      return get(ordinal, udt.sqlType());
     } else {
       throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3155");
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index d51007e7d3365..eb63fa9208a82 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String}
+import org.apache.spark.unsafe.types.{BinaryView, UTF8String}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.collection.Utils
 
@@ -378,8 +378,8 @@ object CatalystTypeConverters {
   }
 
   private class GeometryConverter(dataType: GeometryType)
-      extends CatalystTypeConverter[Any, org.apache.spark.sql.types.Geometry, GeometryVal] {
-    override def toCatalystImpl(scalaValue: Any): GeometryVal = scalaValue match {
+      extends CatalystTypeConverter[Any, org.apache.spark.sql.types.Geometry, BinaryView] {
+    override def toCatalystImpl(scalaValue: Any): BinaryView = scalaValue match {
       case g: org.apache.spark.sql.types.Geometry if SQLConf.get.geospatialEnabled =>
         STUtils.serializeGeomFromWKB(g, dataType)
       case other => throw new SparkIllegalArgumentException(
@@ -389,7 +389,7 @@ object CatalystTypeConverters {
           "otherClass" -> other.getClass.getCanonicalName,
           "dataType" -> StringType.sql))
     }
-    override def toScala(catalystValue: GeometryVal): org.apache.spark.sql.types.Geometry = {
+    override def toScala(catalystValue: BinaryView): org.apache.spark.sql.types.Geometry = {
       assertGeospatialEnabled()
       if (catalystValue == null) null
       else STUtils.deserializeGeom(catalystValue, dataType)
@@ -398,13 +398,13 @@ object CatalystTypeConverters {
     override def toScalaImpl(row: InternalRow, column: Int):
         org.apache.spark.sql.types.Geometry = {
       assertGeospatialEnabled()
-      STUtils.deserializeGeom(row.getGeometry(0), dataType)
+      STUtils.deserializeGeom(row.getBinaryView(0), dataType)
     }
   }
 
   private class GeographyConverter(dataType: GeographyType)
-      extends CatalystTypeConverter[Any, org.apache.spark.sql.types.Geography, GeographyVal] {
-    override def toCatalystImpl(scalaValue: Any): GeographyVal = scalaValue match {
+      extends CatalystTypeConverter[Any, org.apache.spark.sql.types.Geography, BinaryView] {
+    override def toCatalystImpl(scalaValue: Any): BinaryView = scalaValue match {
       case g: org.apache.spark.sql.types.Geography if SQLConf.get.geospatialEnabled =>
         STUtils.serializeGeogFromWKB(g, dataType)
       case other => throw new SparkIllegalArgumentException(
@@ -414,7 +414,7 @@ object CatalystTypeConverters {
           "otherClass" -> other.getClass.getCanonicalName,
           "dataType" -> StringType.sql))
     }
-    override def toScala(catalystValue: GeographyVal): org.apache.spark.sql.types.Geography = {
+    override def toScala(catalystValue: BinaryView): org.apache.spark.sql.types.Geography = {
       assertGeospatialEnabled()
       if (catalystValue == null) null
       else STUtils.deserializeGeog(catalystValue, dataType)
@@ -423,7 +423,7 @@ object CatalystTypeConverters {
     override def toScalaImpl(row: InternalRow, column: Int):
         org.apache.spark.sql.types.Geography = {
       assertGeospatialEnabled()
-      STUtils.deserializeGeog(row.getGeography(0), dataType)
+      STUtils.deserializeGeog(row.getBinaryView(0), dataType)
     }
   }
 
@@ -666,7 +666,7 @@ object CatalystTypeConverters {
     case r: Row => InternalRow(r.toSeq.map(convertToCatalyst): _*)
     case arr: Array[Byte] => arr
     case g: org.apache.spark.sql.types.Geometry => STUtils.stGeomFromWKB(g.getBytes, g.getSrid)
-    case g: org.apache.spark.sql.types.Geography => STUtils.stGeogFromWKB(g.getBytes)
+    case g: org.apache.spark.sql.types.Geography => STUtils.stGeogFromWKB(g.getBytes, g.getSrid)
     case arr: Array[Char] => StringConverter.toCatalyst(arr)
     case arr: Array[_] => new GenericArrayData(arr.map(convertToCatalyst))
     case map: Map[_, _] =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index b27283cb3f647..cd6b3155eec45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.types.ops.TypeOps
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.{BinaryView, CalendarInterval, UTF8String}
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -119,6 +119,7 @@ object InternalRow {
    */
   def copyValue(value: Any): Any = value match {
     case v: UTF8String => v.copy()
+    case v: BinaryView => v.copy()
     case v: InternalRow => v.copy()
     case v: ArrayData => v.copy()
     case v: MapData => v.copy()
@@ -190,6 +191,8 @@ object InternalRow {
     case udt: UserDefinedType[_] => getWriter(ordinal, udt.sqlType)
     case NullType => (input, _) => input.setNullAt(ordinal)
     case StringType => (input, v) => input.update(ordinal, v.asInstanceOf[UTF8String].copy())
+    case _: GeometryType | _: GeographyType =>
+      (input, v) => input.update(ordinal, v.asInstanceOf[BinaryView].copy())
     case _: StructType => (input, v) => input.update(ordinal, v.asInstanceOf[InternalRow].copy())
     case _: ArrayType => (input, v) => input.update(ordinal, v.asInstanceOf[ArrayData].copy())
     case _: MapType => (input, v) => input.update(ordinal, v.asInstanceOf[MapData].copy())
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
index 0e451db6cfe25..622ef647591b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
@@ -93,12 +93,8 @@ case class ProjectingInternalRow(schema: StructType,
     row.getBinary(colOrdinals(ordinal))
   }
 
-  override def getGeography(ordinal: Int): GeographyVal = {
-    row.getGeography(colOrdinals(ordinal))
-  }
-
-  override def getGeometry(ordinal: Int): GeometryVal = {
-    row.getGeometry(colOrdinals(ordinal))
+  override def getBinaryView(ordinal: Int): BinaryView = {
+    row.getBinaryView(colOrdinals(ordinal))
   }
 
   override def getInterval(ordinal: Int): CalendarInterval = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0ba801e3d6b7b..651fd06c898a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -25,10 +25,11 @@ import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Random, Success, Try}
 
-import org.apache.spark.{SparkException, SparkThrowable, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.internal.config.ConfigBindingPolicy
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
+import org.apache.spark.sql.catalyst.analysis.TableOutputResolver.DefaultValueFillMode._
 import org.apache.spark.sql.catalyst.analysis.resolver.{
   AnalyzerBridgeState,
   HybridAnalyzer,
@@ -50,7 +51,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{toPrettySQL, trimTempResolvedColumn, CharVarcharUtils}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{View => _, _}
+import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.{After, ColumnPosition}
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
@@ -74,7 +75,7 @@ import org.apache.spark.util.ArrayImplicits._
  * functions.
  */
 object SimpleAnalyzer extends Analyzer(
-  new CatalogManager(
+  new DefaultCatalogManager(
     FakeV2SessionCatalog,
     new SessionCatalog(
       new InMemoryCatalog,
@@ -139,10 +140,15 @@ object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog with Suppo
  *                              even if a temp view `t` has been created.
  * @param outerPlan The query plan from the outer query that can be used to resolve star
  *                  expressions in a subquery.
+ * @param resolutionPathEntries When resolving a view or SQL function body, the ordered frozen
+ *                              path for unqualified relation/function names (if persisted in
+ *                              metadata). Outside views/functions, compute from session
+ *                              [[CatalogManager.sqlResolutionPathEntries]].
  */
 case class AnalysisContext(
     isDefault: Boolean = false,
     catalogAndNamespace: Seq[String] = Nil,
+    resolutionPathEntries: Option[Seq[Seq[String]]] = None,
     nestedViewDepth: Int = 0,
     maxNestedViewDepth: Int = -1,
     relationCache: mutable.Map[(Seq[String], Option[TimeTravelSpec]), LogicalPlan] =
@@ -198,7 +204,6 @@ object AnalysisContext {
   def withAnalysisContext[A](viewDesc: CatalogTable)(f: => A): A = {
     val originContext = value.get()
     val maxNestedViewDepth = if (originContext.maxNestedViewDepth == -1) {
-      // Here we start to resolve views, get `maxNestedViewDepth` from configs.
       SQLConf.get.maxNestedViewDepth
     } else {
       originContext.maxNestedViewDepth
@@ -206,6 +211,9 @@ object AnalysisContext {
     val context = AnalysisContext(
       isDefault = false,
       catalogAndNamespace = viewDesc.viewCatalogAndNamespace,
+      resolutionPathEntries = viewDesc.viewStoredResolutionPath
+        .map(CatalogManager.deserializePathEntriesOrFail(
+          _, "view", viewDesc.identifier.unquotedString)),
       nestedViewDepth = originContext.nestedViewDepth + 1,
       maxNestedViewDepth = maxNestedViewDepth,
       relationCache = originContext.relationCache,
@@ -219,7 +227,12 @@ object AnalysisContext {
 
   def withAnalysisContext[A](function: SQLFunction)(f: => A): A = {
     val originContext = value.get()
-    val context = originContext.copy(collation = function.collation)
+    // Function body analysis should not inherit any caller-pinned path; use only function metadata.
+    val context = originContext.copy(
+      resolutionPathEntries = function.functionStoredResolutionPath
+        .map(CatalogManager.deserializePathEntriesOrFail(
+          _, "SQL function", function.name.unquotedString)),
+      collation = function.collation)
     set(context)
     try f finally { set(originContext) }
   }
@@ -290,12 +303,14 @@ object Analyzer {
  */
 class Analyzer(
     override val catalogManager: CatalogManager,
-    private[sql] val sharedRelationCache: RelationCache = RelationCache.empty)
+    private[sql] val sharedRelationCache: RelationCache = RelationCache.empty,
+    private[sql] val sessionConf: Option[SQLConf] = None)
   extends RuleExecutor[LogicalPlan]
   with CheckAnalysis with AliasHelper with SQLConfHelper with ColumnResolutionHelper {
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
-  private val relationResolution = new RelationResolution(catalogManager, sharedRelationCache)
+  private val relationResolution =
+    new RelationResolution(catalogManager, sharedRelationCache)
   private val functionResolution = new FunctionResolution(catalogManager, relationResolution)
 
   override protected def validatePlanChanges(
@@ -308,7 +323,7 @@ class Analyzer(
 
   // Only for tests.
   def this(catalog: SessionCatalog) = {
-    this(new CatalogManager(FakeV2SessionCatalog, catalog))
+    this(new DefaultCatalogManager(FakeV2SessionCatalog, catalog))
   }
 
   def getRelationResolution: RelationResolution = relationResolution
@@ -317,11 +332,13 @@ class Analyzer(
     if (plan.analyzed) {
       plan
     } else {
+      def runAnalysis(): LogicalPlan = HybridAnalyzer.fromLegacyAnalyzer(
+        legacyAnalyzer = this, tracker = tracker).apply(plan)
       if (AnalysisContext.get.isDefault) {
         AnalysisContext.reset()
         try {
           AnalysisHelper.markInAnalyzer {
-            HybridAnalyzer.fromLegacyAnalyzer(legacyAnalyzer = this, tracker = tracker).apply(plan)
+            runWithSessionConf(runAnalysis())
           }
         } finally {
           AnalysisContext.reset()
@@ -329,20 +346,64 @@ class Analyzer(
       } else {
         AnalysisContext.withNewAnalysisContext {
           AnalysisHelper.markInAnalyzer {
-            HybridAnalyzer.fromLegacyAnalyzer(legacyAnalyzer = this, tracker = tracker).apply(plan)
+            runWithSessionConf(runAnalysis())
           }
         }
       }
     }
   }
 
+  /**
+   * Runs `thunk` under the analyzer's [[sessionConf]] for analyzer isolation, but yields to any
+   * outer [[SQLConf.withExistingConf]] scope (e.g. a SQL UDF / view body that pinned the
+   * creation-time configs). Falls through unchanged when [[sessionConf]] is unset, or when the
+   * outer scope already installed a different conf -- otherwise the outer scope's conf would be
+   * silently clobbered.
+   */
+  private def runWithSessionConf[T](thunk: => T): T = sessionConf match {
+    case None => thunk
+    case Some(c) =>
+      SQLConf.getExistingConfIfSet match {
+        case Some(outer) if outer ne c => thunk
+        case _ => SQLConf.withExistingConf(c) { thunk }
+      }
+  }
+
+  /**
+   * Returns a copy of this analyzer that uses the given [[CatalogManager]] for all catalog
+   * lookups. All other configuration (extended rules, checks, etc.) is preserved. Used by
+   * [[QueryExecution]] to create a per-query analyzer for transactional operations for
+   * transaction-aware catalog resolution.
+   *
+   * IMPORTANT: any new extension point added to Analyzer must also be copied here, otherwise
+   * transaction-aware analyzer clones (created by QueryExecution) will silently miss those rules.
+   */
+  def withCatalogManager(newCatalogManager: CatalogManager): Analyzer = {
+    val self = this
+    new Analyzer(newCatalogManager, sharedRelationCache) {
+      override val hintResolutionRules: Seq[Rule[LogicalPlan]] = self.hintResolutionRules
+      override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = self.extendedResolutionRules
+      override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = self.postHocResolutionRules
+      override val extendedCheckRules: Seq[LogicalPlan => Unit] = self.extendedCheckRules
+      override val singlePassResolverExtensions: Seq[ResolverExtension] =
+        self.singlePassResolverExtensions
+      override val singlePassMetadataResolverExtensions: Seq[ResolverExtension] =
+        self.singlePassMetadataResolverExtensions
+      override val singlePassPostHocResolutionRules: Seq[Rule[LogicalPlan]] =
+        self.singlePassPostHocResolutionRules
+      override val singlePassExtendedResolutionChecks: Seq[LogicalPlan => Unit] =
+        self.singlePassExtendedResolutionChecks
+    }
+  }
+
   override def execute(plan: LogicalPlan): LogicalPlan = {
     AnalysisContext.withNewAnalysisContext {
       executeSameContext(plan)
     }
   }
 
-  private def executeSameContext(plan: LogicalPlan): LogicalPlan = super.execute(plan)
+  private def executeSameContext(plan: LogicalPlan): LogicalPlan =
+    runWithSessionConf(super.execute(plan))
 
   def resolver: Resolver = conf.resolver
 
@@ -437,7 +498,9 @@ class Analyzer(
     Batch("Simple Sanity Check", Once,
       LookupFunctions),
     Batch("Keep Legacy Outputs", Once,
-      KeepLegacyOutputs)
+      KeepLegacyOutputs),
+    Batch("Unresolve Relations", Once,
+      new UnresolveRelationsInTransaction(catalogManager))
   )
 
   override def batches: Seq[Batch] = earlyBatches ++ Seq(
@@ -445,6 +508,7 @@ class Analyzer(
       new ResolveCatalogs(catalogManager) ::
       ResolveInsertInto ::
       ResolveRelations ::
+      ResolveChangelogTable ::
       ResolvePartitionSpec ::
       ResolveFieldNameAndPosition ::
       AddMetadataColumns ::
@@ -981,7 +1045,8 @@ class Analyzer(
     // This is done by keeping the catalog and namespace in `AnalysisContext`, and analyzer will
     // look at `AnalysisContext.catalogAndNamespace` when resolving relations with single-part name.
     // If `AnalysisContext.catalogAndNamespace` is non-empty, analyzer will expand single-part names
-    // with it, instead of current catalog and namespace.
+    // with it, instead of current catalog and namespace. For views/functions with persisted frozen
+    // PATH, `AnalysisContext.resolutionPathEntries` drives unqualified relation lookup order.
     private def resolveViews(
         plan: LogicalPlan,
         options: CaseInsensitiveStringMap): LogicalPlan = plan match {
@@ -992,7 +1057,7 @@ class Analyzer(
       // DataSourceV2Relation on each view access. Only dataframe temp view may contain it
       // as it stores resolved plans directly.
       case view: View if view.isTempViewStoringAnalyzedPlan =>
-        view.copy(child = resolveTableReferences(view.child))
+        view.copy(child = resolveTableReferencesInTempView(view.child))
       case p @ SubqueryAlias(_, view: View) =>
         p.copy(child = resolveViews(view, options))
       case _ => plan
@@ -1001,17 +1066,18 @@ class Analyzer(
     // Unwrap temp views storing analyzed plans and resolve V2TableReference nodes in the child.
     private def unwrapRelationPlan(plan: LogicalPlan): LogicalPlan = {
       EliminateSubqueryAliases(plan) match {
-        case v: View if v.isTempViewStoringAnalyzedPlan => resolveTableReferences(v.child)
+        case v: View if v.isTempViewStoringAnalyzedPlan => resolveTableReferencesInTempView(v.child)
         case other => other
       }
     }
 
-    // Resolve V2TableReference nodes in a plan. V2TableReference is only created for temp views
-    // (via V2TableReference.createForTempView), so we only need to resolve it when returning
+    // Resolve V2TableReference nodes inside temp view plans. These are created by
+    // V2TableReference.createForTempView. We only need to resolve it when returning
     // the plan of temp views (in resolveViews and unwrapRelationPlan).
-    private def resolveTableReferences(plan: LogicalPlan): LogicalPlan = {
+    private def resolveTableReferencesInTempView(plan: LogicalPlan): LogicalPlan = {
       plan.resolveOperatorsUp {
-        case r: V2TableReference => relationResolution.resolveReference(r)
+        case r: V2TableReference if r.context.isInstanceOf[V2TableReference.TemporaryViewContext] =>
+          relationResolution.resolveReference(r)
       }
     }
 
@@ -1034,7 +1100,18 @@ class Analyzer(
           case other => i.copy(table = other)
         }
 
-      // TODO (SPARK-27484): handle streaming write commands when we have them.
+      case write: V2StreamingWriteCommand =>
+        write.table match {
+          case ref: V2TableReference =>
+            relationResolution.resolveReference(ref) match {
+              case r: NamedRelation => write.withNewTable(r)
+              case other => throw SparkException.internalError(
+                s"Expected V2TableReference write target to resolve to a NamedRelation, " +
+                  s"but got ${other.getClass.getName}")
+            }
+          case _ => write
+        }
+
       case write: V2WriteCommand =>
         write.table match {
           case u: UnresolvedRelation if !u.isStreaming =>
@@ -1063,8 +1140,8 @@ class Analyzer(
         val timeTravelSpec = TimeTravelSpec.create(timestamp, version, conf.sessionLocalTimeZone)
         resolveRelation(u, timeTravelSpec).getOrElse(r)
 
-      case r @ RelationChanges(u: UnresolvedRelation, changelogInfo) =>
-        relationResolution.resolveChangelog(u, changelogInfo).getOrElse(r)
+      case r @ RelationChanges(u: UnresolvedRelation, ctx) =>
+        relationResolution.resolveChangelog(u, ctx).getOrElse(r)
 
       case u @ UnresolvedTable(identifier, cmd, suggestAlternative) =>
         lookupTableOrView(identifier).map {
@@ -1090,7 +1167,7 @@ class Analyzer(
           case other => other
         }.getOrElse(u)
 
-      case u @ UnresolvedTableOrView(identifier, cmd, allowTempView) =>
+      case u @ UnresolvedTableOrView(identifier, cmd, allowTempView, _) =>
         lookupTableOrView(identifier).map {
           case _: ResolvedTempView if !allowTempView =>
             throw QueryCompilationErrors.expectPermanentViewNotTempViewError(
@@ -1101,7 +1178,19 @@ class Analyzer(
 
     /**
      * Resolves relations to `ResolvedTable` or `Resolved[Temp/Persistent]View`. This is
-     * for resolving DDL and misc commands.
+     * for resolving DDL and misc commands. UnresolvedView callers reject non-view results
+     * downstream via `expectViewNotTableError`.
+     *
+     * When `viewOnly=true`, non-session catalogs that do not implement [[ViewCatalog]] are
+     * rejected up front with MISSING_CATALOG_ABILITY.VIEWS -- they cannot host views at all,
+     * so surfacing a downstream "view not found" would hide the real reason.
+     *
+     * Lookup order against a non-session catalog:
+     *   1. If the catalog is a [[TableViewCatalog]], [[TableViewCatalog.loadTableOrView]] is called
+     *      once. A returned [[MetadataTable]] wrapping a [[ViewInfo]] is interpreted as a
+     *      view; other results are tables.
+     *   2. Otherwise, [[TableCatalog.loadTable]] is tried (when implemented), then
+     *      [[ViewCatalog.loadView]] as the fallback view-resolution path (when implemented).
      */
     private def lookupTableOrView(
         identifier: Seq[String],
@@ -1111,18 +1200,58 @@ class Analyzer(
       }.orElse {
         relationResolution.expandIdentifier(identifier) match {
           case CatalogAndIdentifier(catalog, ident) =>
-            if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog)) {
-              throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views")
+            if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog) &&
+                !catalog.isInstanceOf[ViewCatalog]) {
+              throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog)
             }
-            CatalogV2Util.loadTable(catalog, ident).map {
-              case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) &&
-                v1Table.v1Table.tableType == CatalogTableType.VIEW =>
-                val v1Ident = v1Table.catalogTable.identifier
-                val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier)
-                ResolvedPersistentView(
-                  catalog, v2Ident, v1Table.catalogTable)
-              case table =>
-                ResolvedTable.create(catalog.asTableCatalog, ident, table)
+            catalog match {
+              case mc: TableViewCatalog =>
+                // Single-RPC perf path: loadTableOrView returns a Table for a table or a
+                // MetadataTable wrapping a ViewInfo for a view. NoSuchTable means
+                // neither exists.
+                try {
+                  Some(mc.loadTableOrView(ident) match {
+                    case t: MetadataTable if t.getTableInfo.isInstanceOf[ViewInfo] =>
+                      ResolvedPersistentView(
+                        catalog, ident, t.getTableInfo.asInstanceOf[ViewInfo])
+                    case table =>
+                      ResolvedTable.create(catalog.asTableCatalog, ident, table)
+                  })
+                } catch {
+                  case _: NoSuchTableException => None
+                }
+              case _ =>
+                // Skip the table-side lookup entirely for view-only catalogs (no
+                // `TableCatalog` mixin): `CatalogV2Util.loadTable` would call `asTableCatalog`
+                // and throw MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-
+                // resolution path.
+                val tableResolved: Option[LogicalPlan] = if (
+                  CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog]
+                ) {
+                  CatalogV2Util.loadTable(catalog, ident).map {
+                    case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) &&
+                      v1Table.v1Table.isViewLike =>
+                      val v1Ident = v1Table.catalogTable.identifier
+                      val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier)
+                      ResolvedPersistentView(
+                        catalog, v2Ident, new V1ViewInfo(v1Table.catalogTable))
+                    case table =>
+                      ResolvedTable.create(catalog.asTableCatalog, ident, table)
+                  }
+                } else {
+                  None
+                }
+                tableResolved.orElse {
+                  catalog match {
+                    case vc: ViewCatalog =>
+                      try {
+                        Some(ResolvedPersistentView(catalog, ident, vc.loadView(ident)))
+                      } catch {
+                        case _: NoSuchViewException => None
+                      }
+                    case _ => None
+                  }
+                }
             }
           case _ => None
         }
@@ -1411,6 +1540,37 @@ class Analyzer(
     }
 
     def doApply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+      // `InsertIntoStatement.table` and `V2WriteCommand.table` are non-child `LogicalPlan`
+      // slots (`child = query`), so the default `resolveOperatorsUp` + `mapExpressions`
+      // traversal never resolves expressions placed inside them. For a
+      // `PlanWithUnresolvedIdentifier`, `identifierExpr` (e.g. an `UnresolvedAttribute`
+      // referring to a SQL variable in `INSERT INTO IDENTIFIER(target_table) ...`) must
+      // be resolved here before `ResolveIdentifierClause` can materialize the relation.
+      // Mirror the structural recursion into the non-child `.table` slot that
+      // `BindParameters` and `ResolveIdentifierClause` already do for the same shape
+      // (SPARK-46625); unlike those rules, this one performs attribute resolution rather
+      // than parameter binding or placeholder materialization. Resolve against `p` (whose
+      // `children` are `Nil` on the INSERT / `OverwriteByExpression` path built by
+      // `buildWriteTableSlot`) so the IDENTIFIER expression cannot see query output
+      // columns -- only the last-resort variable resolution path fires. The
+      // `!identifierExpr.resolved` guard makes the case idempotent under bottom-up
+      // traversal.
+      case i: InsertIntoStatement
+          if i.table.isInstanceOf[PlanWithUnresolvedIdentifier] &&
+             !i.table.asInstanceOf[PlanWithUnresolvedIdentifier].identifierExpr.resolved =>
+        val p = i.table.asInstanceOf[PlanWithUnresolvedIdentifier]
+        val resolvedExpr = resolveExpressionByPlanChildren(
+          p.identifierExpr, p, includeLastResort = true)
+        i.copy(table = p.copy(identifierExpr = resolvedExpr))
+
+      case w: V2WriteCommand
+          if w.table.isInstanceOf[PlanWithUnresolvedIdentifier] &&
+             !w.table.asInstanceOf[PlanWithUnresolvedIdentifier].identifierExpr.resolved =>
+        val p = w.table.asInstanceOf[PlanWithUnresolvedIdentifier]
+        val resolvedExpr = resolveExpressionByPlanChildren(
+          p.identifierExpr, p, includeLastResort = true)
+        w.withNewTable(p.copy(identifierExpr = resolvedExpr))
+
       // Don't wait other rules to resolve the child plans of `InsertIntoStatement` as we need
       // to resolve column "DEFAULT" in the child plans so that they must be unresolved.
       case i: InsertIntoStatement => resolveColumnDefaultInCommandInputQuery(i)
@@ -1581,7 +1741,7 @@ class Analyzer(
                   // These columns will be added by ResolveSchemaEvolution later.
                   sourceTable.output.map { sourceAttr =>
                     val key = findAttrInTarget(sourceAttr.name).getOrElse(
-                      UnresolvedAttribute(sourceAttr.name))
+                      UnresolvedAttribute.quoted(sourceAttr.name))
                     Assignment(key, sourceAttr)
                   }
                 } else {
@@ -1617,7 +1777,7 @@ class Analyzer(
                   // These columns will be added by ResolveSchemaEvolution later.
                   sourceTable.output.map { sourceAttr =>
                     val key = findAttrInTarget(sourceAttr.name).getOrElse(
-                      UnresolvedAttribute(sourceAttr.name))
+                      UnresolvedAttribute.quoted(sourceAttr.name))
                     Assignment(key, sourceAttr)
                   }
                 } else {
@@ -1810,14 +1970,27 @@ class Analyzer(
       child: LogicalPlan): Seq[NamedExpression] = {
       exprs.flatMap {
         // Using Dataframe/Dataset API: testData2.groupBy($"a", $"b").agg($"*")
-        case s: Star => expand(s, child)
+        case s: Star => expand(s, child).map(aliasIfOuterReference)
         // Using SQL API without running ResolveAlias: SELECT * FROM testData2 group by a, b
-        case UnresolvedAlias(s: Star, _) => expand(s, child)
+        case UnresolvedAlias(s: Star, _) => expand(s, child).map(aliasIfOuterReference)
         case o if containsStar(o :: Nil) => expandStarExpression(o, child) :: Nil
         case o => o :: Nil
       }.map(_.asInstanceOf[NamedExpression])
     }
 
+    /**
+     * Wrap an outer-scope star expansion result in [[Alias]] so that the [[OuterReference]]
+     * attribute gets a fresh ExprId in the subquery's scope. This prevents the outer ExprId from
+     * leaking through [[Project.output]] when the expansion goes through a derived table.
+     * Struct star expansion already produces [[Alias]] nodes, so those are left unchanged.
+     */
+    private def aliasIfOuterReference(e: NamedExpression): NamedExpression = e match {
+      case _: Alias => e
+      case outerReference: OuterReference =>
+        Alias(outerReference, toPrettySQL(outerReference.e))()
+      case _ => e
+    }
+
     /**
      * Returns true if `exprs` contains a [[Star]].
      */
@@ -1838,14 +2011,15 @@ class Analyzer(
      * This is used for special syntax transformations (e.g., COUNT(*) -> COUNT(1)) that
      * should only apply to builtin functions, not to user-defined functions.
      *
-     * In legacy mode (sessionOrder="first"), temp functions shadow builtins, so an
-     * unqualified name that matches a temp function should NOT be treated as builtin.
+     * When the effective SQL PATH puts `system.session` before `system.builtin`, temp
+     * functions shadow builtins, so an unqualified name that matches a temp function
+     * should NOT be treated as builtin.
      */
     private def matchesFunctionName(nameParts: Seq[String], expectedName: String): Boolean = {
       if (!FunctionResolution.isUnqualifiedOrBuiltinFunctionName(nameParts, expectedName)) {
         return false
       }
-      if (nameParts.size == 1 && conf.sessionFunctionResolutionOrder == "first") {
+      if (nameParts.size == 1 && functionResolution.isSessionBeforeBuiltinInPath) {
         val v1Catalog = catalogManager.v1SessionCatalog
         !v1Catalog.isTemporaryFunction(FunctionIdentifier(nameParts.head))
       } else {
@@ -2078,10 +2252,8 @@ class Analyzer(
                   throw QueryCompilationErrors.notAScalarFunctionError(nameParts.mkString("."), f)
 
                 case FunctionType.NotFound =>
-                  val catalogPath =
-                    catalogManager.currentCatalog.name +: catalogManager.currentNamespace
-                  val searchPath = SQLConf.get.resolutionSearchPath(catalogPath.toSeq)
-                    .map(_.quoted)
+                  val searchPath =
+                    functionResolution.sqlResolutionPathEntriesForAnalysis.map(_.quoted)
                   throw QueryCompilationErrors.unresolvedRoutineError(
                     nameParts,
                     searchPath,
@@ -2278,20 +2450,8 @@ class Analyzer(
   object ResolveProcedures extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
       _.containsPattern(UNRESOLVED_PROCEDURE), ruleId) {
-      case UnresolvedProcedure(CatalogAndIdentifier(catalog, ident)) =>
-        val procedureCatalog = catalog.asProcedureCatalog
-        val procedure = load(procedureCatalog, ident)
-        ResolvedProcedure(procedureCatalog, ident, procedure)
-    }
-
-    private def load(catalog: ProcedureCatalog, ident: Identifier): UnboundProcedure = {
-      try {
-        catalog.loadProcedure(ident)
-      } catch {
-        case e: Exception if !e.isInstanceOf[SparkThrowable] =>
-          val nameParts = catalog.name +: ident.asMultipartIdentifier
-          throw QueryCompilationErrors.failedToLoadRoutineError(nameParts, e)
-      }
+      case u: UnresolvedProcedure =>
+        functionResolution.resolveProcedure(u)
     }
   }
 
@@ -2392,6 +2552,15 @@ class Analyzer(
         case a: FunctionTableSubqueryArgumentExpression if !a.plan.resolved =>
           resolveSubQuery(a, outer)(
             (plan, outerAttrs) => a.copy(plan = plan, outerAttrs = outerAttrs))
+        // The subquery's plan is already resolved. Replace any V2TableReferences without
+        // re-running any analyzer rules.
+        case se: SubqueryExpression
+            if se.plan.resolved &&
+               se.plan.collectFirstWithSubqueries { case _: V2TableReference => () }.isDefined =>
+          val newPlan = se.plan.transformWithSubqueries {
+            case r: V2TableReference => relationResolution.resolveReference(r)
+          }
+          se.withNewPlan(newPlan)
       }
     }
 
@@ -2525,18 +2694,23 @@ class Analyzer(
           // unresolved.
           !f.inputs.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
           withPosition(f) {
-            val plan = resolve(f)
-            // Extract the function input project list from the SQL function plan and
-            // inline the SQL function expression.
-            plan match {
-              case Project(body :: Nil, Project(aliases, _: OneRowRelation)) =>
-                val inputs = aliases.map(stripOuterReference)
-                projectList ++= inputs
-                SQLScalarFunction(f.function, inputs.map(_.toAttribute), body)
-              case o =>
-                throw new AnalysisException(
-                  errorClass = "INVALID_SQL_FUNCTION_PLAN_STRUCTURE",
-                  messageParameters = Map("plan" -> o.toString))
+            // Set CurrentOrigin to the SQL function call site so that input-binding
+            // Casts constructed inside makeSQLFunctionPlan capture the call-site
+            // position in their queryContext snapshot (see Cast.initQueryContext).
+            withOrigin(f.origin) {
+              val plan = resolve(f)
+              // Extract the function input project list from the SQL function plan and
+              // inline the SQL function expression.
+              plan match {
+                case Project(body :: Nil, Project(aliases, _: OneRowRelation)) =>
+                  val inputs = aliases.map(stripOuterReference)
+                  projectList ++= inputs
+                  SQLScalarFunction(f.function, inputs.map(_.toAttribute), body)
+                case o =>
+                  throw new AnalysisException(
+                    errorClass = "INVALID_SQL_FUNCTION_PLAN_STRUCTURE",
+                    messageParameters = Map("plan" -> o.toString))
+              }
             }
           }
         case o => o.mapChildren(rewriteSQLFunctions(_, projectList))
@@ -3196,7 +3370,15 @@ class Analyzer(
           throw QueryCompilationErrors.nestedGeneratorError(g.generator)
         }
         g.copy(generatorOutput =
-          GeneratorResolution.makeGeneratorOutput(g.generator, g.generatorOutput.map(_.name)))
+          GeneratorResolution.makeGeneratorOutput(
+            g.generator, g.generatorOutput.map {
+              case ua: UnresolvedAttribute =>
+                // LATERAL VIEW parser always emits single-part names via
+                // UnresolvedAttribute.quoted; assert to fail loudly if that ever changes.
+                assert(ua.nameParts.length == 1, s"unexpected multi-part name: ${ua.nameParts}")
+                ua.nameParts.head
+              case a => a.name
+            }))
       }
     }
   }
@@ -3659,9 +3841,16 @@ class Analyzer(
         validateStoreAssignmentPolicy()
         TableOutputResolver.suitableForByNameCheck(v2Write.isByName,
           expected = v2Write.table.output, queryOutput = v2Write.query.output)
+        // With schema evolution + coercion flag, missing top-level columns AND missing nested
+        // struct fields are filled with defaults/null (RECURSE mode). Otherwise, only missing
+        // top-level columns are filled via FILL mode; missing nested struct fields still cause
+        // schema enforcement errors.
+        val defaultValueFillMode =
+          if (conf.coerceInsertNestedTypes && v2Write.schemaEvolutionEnabled) RECURSE
+          else FILL
         val projection = TableOutputResolver.resolveOutputColumns(
           v2Write.table.name, v2Write.table.output, v2Write.query, v2Write.isByName, conf,
-          supportColDefaultValue = true)
+          defaultValueFillMode)
         if (projection != v2Write.query) {
           val cleanedTable = v2Write.table match {
             case r: DataSourceV2Relation =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index e23e7561f0e36..23c416dd4b383 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -192,8 +192,8 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       // Ideally the implicit cast rule should be the same as `Cast.canANSIStoreAssign` so that it's
       // consistent with table insertion. To avoid breaking too many existing Spark SQL queries,
       // we make the system to allow implicitly converting String type as other primitive types.
-      case (_: StringType, a @ (_: AtomicType | NumericType | DecimalType | AnyTimestampType)) =>
-        Some(a.defaultConcreteType)
+      case (_: StringType, a @ (_: AtomicType | NumericType | DecimalType | AnyTimestampType |
+          AnyTimeType)) => Some(a.defaultConcreteType)
 
       case (ArrayType(fromType, _), AbstractArrayType(toType)) =>
         implicitCast(fromType, toType).map(ArrayType(_, true))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
index 67d5b70b30a33..ca9418a0c6eff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala
@@ -134,7 +134,7 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] {
       case alterViewAs: AlterViewAs =>
         alterViewAs.child match {
           case resolvedPersistentView: ResolvedPersistentView =>
-            resolvedPersistentView.metadata.collation
+            Option(resolvedPersistentView.info.properties.get(TableCatalog.PROP_COLLATION))
           case resolvedTempView: ResolvedTempView =>
             resolvedTempView.metadata.collation
           case _ => None
@@ -197,7 +197,7 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] {
             collation = getCollationFromSchemaMetadata(catalog, identifier.namespace())))
 
         case createView@CreateView(ResolvedIdentifier(
-        catalog: SupportsNamespaces, identifier), _, _, _, _, _, _, _, _, _)
+        catalog: SupportsNamespaces, identifier), _, _, _, _, _, _, _, _, _, _, _)
           if createView.collation.isEmpty =>
           val newCreateView = CurrentOrigin.withOrigin(createView.origin) {
             createView.copy(
@@ -206,20 +206,6 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] {
           newCreateView.copyTagsFrom(createView)
           newCreateView
 
-        // We match against ResolvedPersistentView because temporary views don't have a
-        // schema/catalog.
-        case alterViewAs@AlterViewAs(resolvedPersistentView@ResolvedPersistentView(
-        catalog: SupportsNamespaces, identifier, _), _, _)
-          if resolvedPersistentView.metadata.collation.isEmpty =>
-          val newResolvedPersistentView = resolvedPersistentView.copy(
-            metadata = resolvedPersistentView.metadata.copy(
-              collation = getCollationFromSchemaMetadata(catalog, identifier.namespace())))
-          val newAlterViewAs = CurrentOrigin.withOrigin(alterViewAs.origin) {
-            alterViewAs.copy(child = newResolvedPersistentView)
-          }
-          newAlterViewAs.copyTagsFrom(alterViewAs)
-          newAlterViewAs
-
         case createUserDefinedFunction@CreateUserDefinedFunction(
         ResolvedIdentifier(catalog: SupportsNamespaces, identifier),
         _, _, _, _, _, collation, _, _, _, _, _, _) if collation.isEmpty =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CdcNetChangesStatefulProcessor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CdcNetChangesStatefulProcessor.scala
new file mode 100644
index 0000000000000..4350d07c5b43c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CdcNetChangesStatefulProcessor.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{Encoder, Row}
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.connector.catalog.Changelog
+import org.apache.spark.sql.streaming._
+import org.apache.spark.sql.types.StructType
+
+/**
+ * StatefulProcessor that incrementalises CDC net-change computation for streaming reads.
+ *
+ * The batch path (`ResolveChangelogTable.injectNetChangeComputation`) uses a Catalyst
+ * `Window` partitioned by `rowId` and ordered by `(_commit_version, change_type_rank)` to
+ * extract the first and last events per row identity, then applies the SPIP collapse
+ * matrix on `(existedBefore, existsAfter)`. That `Window` is rejected on streaming
+ * queries (`NON_TIME_WINDOW_NOT_SUPPORTED_IN_STREAMING`).
+ *
+ * This processor reuses the same SPIP collapse matrix with `transformWithState`, applied
+ * per watermark window rather than over the full requested version range. Per-row-identity
+ * state stores the first event ever observed and the most-recent event observed; an event
+ * time timer keyed on `_commit_timestamp` advances with each batch and fires once the
+ * global watermark passes the latest event time observed for the key, at which point the
+ * SPIP matrix is evaluated and the net result is emitted. See the paragraph below for how
+ * the per-window collapse differs from batch netChanges' range-scoped collapse.
+ *
+ * Output schema: identical to the connector's changelog schema.
+ *
+ * Streaming netChanges is incremental: per-row-identity state is cleared once its current
+ * net result is emitted (timer fire or end-of-stream flush). Subsequent commits on the same
+ * identity arrive against empty state and produce additional output rows independently. This
+ * differs from batch netChanges, which collapses every change for a row identity across the
+ * entire requested version range; the streaming path cannot retract previously emitted output
+ * to match that range-scoped collapse. For example, with id=1 inserted at v1 and deleted at
+ * v3 and an unrelated commit at v2 in between, batch netChanges over [v1..v3] emits nothing
+ * for id=1, while streaming emits an `insert` (after v2 advances the watermark past v1) and
+ * later a `delete` (after end-of-stream or another commit advances the watermark past v3).
+ *
+ * End-of-stream flushes all pending timers, so a bounded stream's output matches a batch
+ * netChanges only when no row identity is touched again after its first emission.
+ *
+ * @param inputSchema    schema of the rows fed into this processor; the connector's
+ *                       changelog schema (data columns + `_change_type` +
+ *                       `_commit_version` + `_commit_timestamp`) optionally extended with
+ *                       rowId helper columns added by
+ *                       [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]].
+ * @param computeUpdates whether `(existedBefore, existsAfter) = (true, true)` should be
+ *                       relabeled as `update_preimage` / `update_postimage` (true) or kept
+ *                       as `delete` / `insert` (false), matching the batch contract.
+ */
+private[analysis] class CdcNetChangesStatefulProcessor(
+    inputSchema: StructType,
+    computeUpdates: Boolean)
+  extends StatefulProcessor[Row, Row, Row] {
+
+  @transient private var firstEvent: ValueState[Row] = _
+  @transient private var lastEvent: ValueState[Row] = _
+
+  // Hoisted out of `relabel` so we don't pay a linear `fieldIndex` scan per emitted row.
+  private val changeTypeIdx: Int = inputSchema.fieldIndex("_change_type")
+  private val commitVersionIdx: Int = inputSchema.fieldIndex("_commit_version")
+
+  // `_commit_version` is connector-defined and is restricted to LongType or StringType
+  // (validated in `ChangelogTable.validateSchema`). We still route through Catalyst's
+  // type-aware interpreted ordering for symmetry with the batch path's `SortOrder` on
+  // the same attribute.
+  private val versionDataType = inputSchema(commitVersionIdx).dataType
+  private val versionToCatalyst: Any => Any =
+    CatalystTypeConverters.createToCatalystConverter(versionDataType)
+  private val versionInternalOrdering: Ordering[Any] =
+    TypeUtils.getInterpretedOrdering(versionDataType)
+  private val versionOrdering: Ordering[Row] = new Ordering[Row] {
+    override def compare(a: Row, b: Row): Int = {
+      val av = versionToCatalyst(a.get(commitVersionIdx))
+      val bv = versionToCatalyst(b.get(commitVersionIdx))
+      versionInternalOrdering.compare(av, bv)
+    }
+  }
+
+  override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {
+    val handle = getHandle
+    val rowEncoder: Encoder[Row] = ExpressionEncoder(inputSchema)
+    firstEvent = handle.getValueState[Row]("firstEvent", rowEncoder, TTLConfig.NONE)
+    lastEvent = handle.getValueState[Row]("lastEvent", rowEncoder, TTLConfig.NONE)
+  }
+
+  override def handleInputRows(
+      key: Row,
+      inputRows: Iterator[Row],
+      timerValues: TimerValues): Iterator[Row] = {
+    val handle = getHandle
+    // Sort by (_commit_version, change_type rank) -- pre-events (delete /
+    // update_preimage) before post-events (insert / update_postimage) within a single
+    // commit version, matching the batch path's `(_commit_version, change_type_rank)`
+    // ordering. We compose the type-aware Catalyst version ordering with the rank
+    // ordering as a tiebreaker.
+    val sorted = inputRows.toSeq.sorted(versionOrdering.orElse(Ordering.by { row =>
+      row.getAs[String](changeTypeIdx) match {
+        case Changelog.CHANGE_TYPE_UPDATE_PREIMAGE | Changelog.CHANGE_TYPE_DELETE => 0
+        case Changelog.CHANGE_TYPE_INSERT | Changelog.CHANGE_TYPE_UPDATE_POSTIMAGE => 1
+        case _ => throw new SparkException(
+          errorClass = "CHANGELOG_CONTRACT_VIOLATION.UNEXPECTED_CHANGE_TYPE",
+          messageParameters = Map.empty,
+          cause = null)
+      }
+    }))
+    if (sorted.isEmpty) return Iterator.empty
+
+    if (!firstEvent.exists()) {
+      firstEvent.update(sorted.head)
+    }
+    lastEvent.update(sorted.last)
+
+    // Re-arm the per-key event-time timer to the latest observed `_commit_timestamp`.
+    // Without dropping any existing timers we'd risk an earlier timer firing first and
+    // emitting state that later events would then re-populate, producing duplicate
+    // output for the same row identity.
+    //
+    // A NULL `_commit_timestamp` cannot be turned into a timer epoch and would NPE on
+    // `getTime()`. The `Changelog` Javadoc requires non-NULL `_commit_timestamp` on
+    // streaming reads engaging post-processing, so we surface the contract violation
+    // with a clear error class rather than failing the micro-batch with an opaque NPE.
+    val ts = sorted.last.getAs[java.sql.Timestamp]("_commit_timestamp")
+    if (ts == null) {
+      throw new SparkException(
+        errorClass = "CHANGELOG_CONTRACT_VIOLATION.NULL_COMMIT_TIMESTAMP",
+        messageParameters = Map.empty,
+        cause = null)
+    }
+    val newTimerMs = ts.getTime
+    val existing = handle.listTimers().toList
+    existing.foreach(handle.deleteTimer)
+    handle.registerTimer(newTimerMs)
+
+    Iterator.empty
+  }
+
+  override def handleExpiredTimer(
+      key: Row,
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[Row] = {
+    if (!firstEvent.exists()) return Iterator.empty
+
+    val first = firstEvent.get()
+    val last = lastEvent.get()
+    val firstChangeType = first.getAs[String]("_change_type")
+    val lastChangeType = last.getAs[String]("_change_type")
+
+    val existedBefore =
+      firstChangeType == Changelog.CHANGE_TYPE_DELETE ||
+        firstChangeType == Changelog.CHANGE_TYPE_UPDATE_PREIMAGE
+    val existsAfter =
+      lastChangeType == Changelog.CHANGE_TYPE_INSERT ||
+        lastChangeType == Changelog.CHANGE_TYPE_UPDATE_POSTIMAGE
+
+    val (preLabel, postLabel) =
+      if (computeUpdates) {
+        (Changelog.CHANGE_TYPE_UPDATE_PREIMAGE, Changelog.CHANGE_TYPE_UPDATE_POSTIMAGE)
+      } else {
+        (Changelog.CHANGE_TYPE_DELETE, Changelog.CHANGE_TYPE_INSERT)
+      }
+
+    val out: Iterator[Row] = (existedBefore, existsAfter) match {
+      case (false, false) => Iterator.empty
+      case (false, true) => Iterator(relabel(last, Changelog.CHANGE_TYPE_INSERT))
+      case (true, false) => Iterator(relabel(first, Changelog.CHANGE_TYPE_DELETE))
+      case (true, true) => Iterator(relabel(first, preLabel), relabel(last, postLabel))
+    }
+
+    firstEvent.clear()
+    lastEvent.clear()
+    out
+  }
+
+  private def relabel(row: Row, newChangeType: String): Row = {
+    val values = row.toSeq.toArray
+    values(changeTypeIdx) = newChangeType
+    new GenericRowWithSchema(values, inputSchema)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ChangelogInfoUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ChangelogContextUtils.scala
similarity index 77%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ChangelogInfoUtils.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ChangelogContextUtils.scala
index fb7ae01843d6d..bdef991edceca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ChangelogInfoUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ChangelogContextUtils.scala
@@ -18,19 +18,21 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import java.lang.{Long => JLong}
-import java.util.{Locale, Optional => JOptional}
+import java.util.{Optional => JOptional}
 
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
-import org.apache.spark.sql.connector.catalog.ChangelogInfo
+import org.apache.spark.sql.connector.catalog.ChangelogContext
+import org.apache.spark.sql.connector.catalog.ChangelogContext.DeduplicationMode
+import org.apache.spark.sql.connector.catalog.ChangelogContext.DeduplicationMode.DROP_CARRYOVERS
 import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.TimestampType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
- * Utility methods for constructing [[ChangelogInfo]] from DataFrame API options.
+ * Utility methods for constructing [[ChangelogContext]] from DataFrame API options.
  */
-object ChangelogInfoUtils {
+object ChangelogContextUtils {
 
   private val STARTING_VERSION = "startingVersion"
   private val ENDING_VERSION = "endingVersion"
@@ -42,12 +44,12 @@ object ChangelogInfoUtils {
   private val COMPUTE_UPDATES = "computeUpdates"
 
   /**
-   * Build a [[ChangelogInfo]] from the options specified via `.option()` calls on
+   * Build a [[ChangelogContext]] from the options specified via `.option()` calls on
    * `DataFrameReader` or `DataStreamReader`.
    */
   def fromOptions(
       options: CaseInsensitiveStringMap,
-      sessionLocalTimeZone: String): ChangelogInfo = {
+      sessionLocalTimeZone: String): ChangelogContext = {
     val startVersion = Option(options.get(STARTING_VERSION))
     val endVersion = Option(options.get(ENDING_VERSION))
     val startTimestamp = Option(options.get(STARTING_TIMESTAMP))
@@ -56,15 +58,7 @@ object ChangelogInfoUtils {
     val startInclusive = options.getBoolean(STARTING_BOUND_INCLUSIVE, true)
     val endInclusive = options.getBoolean(ENDING_BOUND_INCLUSIVE, true)
 
-    val deduplicationModeStr = Option(options.get(DEDUPLICATION_MODE))
-      .getOrElse("dropCarryovers").toLowerCase(Locale.ROOT)
-    val deduplicationMode = deduplicationModeStr match {
-      case "none" => ChangelogInfo.DeduplicationMode.NONE
-      case "dropcarryovers" => ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS
-      case "netchanges" => ChangelogInfo.DeduplicationMode.NET_CHANGES
-      case other =>
-        throw QueryCompilationErrors.invalidCdcOptionInvalidDeduplicationMode(other)
-    }
+    val deduplicationMode = parseDeduplicationMode(options)
     val computeUpdates = options.getBoolean(COMPUTE_UPDATES, false)
 
     // Determine range from options
@@ -98,7 +92,21 @@ object ChangelogInfoUtils {
       new UnboundedRange()
     }
 
-    new ChangelogInfo(range, deduplicationMode, computeUpdates)
+    new ChangelogContext(range, deduplicationMode, computeUpdates)
+  }
+
+  def parseDeduplicationMode(options: CaseInsensitiveStringMap): DeduplicationMode = {
+    if (options.containsKey(DEDUPLICATION_MODE)) {
+      parseDeduplicationMode(options.get(DEDUPLICATION_MODE))
+    } else {
+      DROP_CARRYOVERS
+    }
+  }
+
+  private def parseDeduplicationMode(value: String): DeduplicationMode = {
+    DeduplicationMode.values()
+      .find(_.value.equalsIgnoreCase(value))
+      .getOrElse(throw QueryCompilationErrors.invalidCdcOptionInvalidDeduplicationMode(value))
   }
 
   private def parseTimestamp(timestampStr: String, sessionLocalTimeZone: String): Long = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 9d48955cbc71e..4e07280f94c9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.catalyst.analysis
 
-import java.util.Locale
-
 import scala.collection.mutable
 
 import org.apache.spark.{SparkException, SparkThrowable}
@@ -45,6 +43,8 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
 
   protected def isView(nameParts: Seq[String]): Boolean
 
+  protected def conf: org.apache.spark.sql.internal.SQLConf
+
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   /**
@@ -73,20 +73,31 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
    * Contains system.session and the current catalog namespace only. Not from SQLConf.
    */
   private def ddlSearchPathForError(catalogPath: Seq[String]): Seq[String] = {
-    Seq(toSQLId(Seq("system", "session")), toSQLId(catalogPath))
+    val sessionPath = Seq(
+      CatalogManager.SYSTEM_CATALOG_NAME,
+      CatalogManager.SESSION_NAMESPACE)
+    Seq(toSQLId(sessionPath), toSQLId(catalogPath))
   }
 
   /**
-   * `SQLConf.resolutionSearchPath` entries formatted with [[toSQLId]] for TABLE_OR_VIEW_NOT_FOUND.
-   * Same ordering as relation resolution and routine resolution search paths.
+   * Formats [[CatalogManager.sqlResolutionPathEntries]] with [[toSQLId]]
+   * for TABLE_OR_VIEW_NOT_FOUND error messages.
    */
   private def fullSearchPathForError(catalogPath: Seq[String]): Seq[String] = {
-    SQLConf.get.resolutionSearchPath(catalogPath).map(toSQLId)
+    val catalog = catalogPath.head
+    val ns = catalogPath.tail.toSeq
+    catalogManager.sqlResolutionPathEntries(catalog, ns).map(toSQLId)
   }
 
-  /** Current catalog name and namespace as a path, used when computing search path for errors. */
-  private def catalogPathForError: Seq[String] = {
-    (currentCatalog.name +: catalogManager.currentNamespace).toSeq
+  /**
+   * Catalog + namespace path for error messages. When resolving inside a view body,
+   * uses the view's defining catalog/namespace from AnalysisContext so the error
+   * reflects where the view was trying to resolve.
+   */
+  protected final def catalogPathForError: Seq[String] = {
+    val ctx = AnalysisContext.get.catalogAndNamespace
+    if (ctx.nonEmpty) ctx
+    else (currentCatalog.name +: catalogManager.currentNamespace).toSeq
   }
 
   /**
@@ -94,13 +105,15 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
    * (e.g. DROP TEMPORARY VIEW). Contains system.session only.
    */
   private def tempViewOnlySearchPathForError(): Seq[String] = {
-    Seq(toSQLId(Seq("system", "session")))
+    Seq(toSQLId(Seq(
+      CatalogManager.SYSTEM_CATALOG_NAME,
+      CatalogManager.SESSION_NAMESPACE)))
   }
 
   /**
    * Search path for TABLE_OR_VIEW_NOT_FOUND on unresolved relations in SELECT/DML/INSERT/time
    * travel. Three-part `system.session.name` resolves only to session temp views, so only that
-   * scope is listed. Other names use [[fullSearchPathForError]] (resolutionSearchPath order).
+   * scope is listed. Other names use [[fullSearchPathForError]] (sqlResolutionPathEntries order).
    */
   private def searchPathForUnresolvedRelation(multipartIdentifier: Seq[String]): Seq[String] = {
     if (CatalogManager.isFullyQualifiedSystemSessionViewName(multipartIdentifier)) {
@@ -381,17 +394,15 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
 
       case u: UnresolvedTableOrView =>
         val catalogPath = catalogPathForError
-        val searchPath = if (u.commandName.toUpperCase(Locale.ROOT).contains("TEMPORARY VIEW")) {
-          tempViewOnlySearchPathForError()
-        } else if (u.commandName.toUpperCase(Locale.ROOT).startsWith("DESCRIBE") ||
-            u.commandName.toUpperCase(Locale.ROOT).startsWith("DESC ")) {
-          if (CatalogManager.isFullyQualifiedSystemSessionViewName(u.multipartIdentifier)) {
-            tempViewOnlySearchPathForError()
-          } else {
-            fullSearchPathForError(catalogPath)
-          }
-        } else {
-          ddlSearchPathForError(catalogPath)
+        val searchPath = u.tableNotFoundSearchPathMode match {
+          case UnresolvedTableOrViewSearchPathMode.QueryLike =>
+            if (CatalogManager.isFullyQualifiedSystemSessionViewName(u.multipartIdentifier)) {
+              tempViewOnlySearchPathForError()
+            } else {
+              fullSearchPathForError(catalogPath)
+            }
+          case UnresolvedTableOrViewSearchPathMode.Ddl =>
+            ddlSearchPathForError(catalogPath)
         }
         u.tableNotFound(u.multipartIdentifier, searchPath)
 
@@ -401,8 +412,7 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
           searchPathForUnresolvedRelation(u.multipartIdentifier))
 
       case u: UnresolvedFunctionName =>
-        val searchPath =
-          SQLConf.get.resolutionSearchPath(catalogPathForError).map(_.quoted)
+        val searchPath = fullSearchPathForError(catalogPathForError)
         throw QueryCompilationErrors.unresolvedRoutineError(
           u.multipartIdentifier,
           searchPath,
@@ -449,6 +459,23 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
           messageParameters = Map("name" -> "IDENTIFIER", "expr" -> p.identifierExpr.sql)
         )
 
+      case c: CacheTableAsSelect if c.tempViewName.resolved =>
+        // The parser builds `tempViewName` as either a `Literal[StringType]` (for direct
+        // identifiers and `IDENTIFIER('literal')`) or an `ExpressionWithUnresolvedIdentifier`
+        // that resolves to such a Literal. Validate the post-analysis shape so any future
+        // construction path that violates the invariant fails loudly here, not deep inside
+        // execution via `tempViewNameString`. The `resolved` guard ensures that when the
+        // IDENTIFIER expression itself failed to resolve (e.g. `IDENTIFIER(<unresolved-col>)`),
+        // we fall through to the catch-all `LogicalPlan` case so the user sees the proper
+        // `UNRESOLVED_COLUMN` error rather than an internal error.
+        c.tempViewName match {
+          case Literal(value, _: StringType) if value != null => // OK
+          case other =>
+            throw SparkException.internalError(
+              "CacheTableAsSelect.tempViewName must be a non-null string literal after " +
+                s"analysis, but got: ${other.sql}")
+        }
+
       case operator: LogicalPlan =>
         operator transformExpressionsDown {
           case hof: HigherOrderFunction if hof.arguments.exists {
@@ -595,7 +622,9 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
               searchPathForUnresolvedRelation(u.multipartIdentifier))
 
           case RelationChanges(u: UnresolvedRelation, _) =>
-            u.tableNotFound(u.multipartIdentifier)
+            u.tableNotFound(
+              u.multipartIdentifier,
+              searchPathForUnresolvedRelation(u.multipartIdentifier))
 
           case etw: EventTimeWatermark =>
             etw.eventTime.dataType match {
@@ -645,6 +674,33 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
                 messageParameters = Map.empty)
             }
 
+          // Reject streaming inputs early. The optimizer rewrite is built around an
+          // unconditioned cross-product fed into a global `Aggregate` keyed by a per-row
+          // identifier (`__qid`). That shape doesn't compose cleanly with structured-streaming
+          // semantics: a stateful aggregate keyed by a freshly-generated identifier accumulates
+          // state indefinitely (every batch creates new keys, old keys never match again) and a
+          // cross-product against a streaming right side has no bounded state model today.
+          // Failing at analysis time is clearer than letting either fail at runtime. Streaming
+          // support is tracked as a follow-up; resolving it likely comes from a different
+          // grouping strategy or a dedicated physical operator.
+          case j: NearestByJoin if j.isStreaming =>
+            j.failAnalysis(
+              errorClass = "NEAREST_BY_JOIN.STREAMING_NOT_SUPPORTED",
+              messageParameters = Map.empty)
+
+          case j: NearestByJoin if !conf.crossJoinEnabled =>
+            j.failAnalysis(
+              errorClass = "NEAREST_BY_JOIN.CROSS_JOIN_NOT_ENABLED",
+              messageParameters = Map.empty)
+
+          case j @ NearestByJoin(_, _, _, _, _, rankingExpression, _)
+              if !RowOrdering.isOrderable(rankingExpression.dataType) =>
+            j.failAnalysis(
+              errorClass = "NEAREST_BY_JOIN.NON_ORDERABLE_RANKING_EXPRESSION",
+              messageParameters = Map(
+                "expression" -> toSQLExpr(rankingExpression),
+                "type" -> toSQLType(rankingExpression.dataType)))
+
           case a: Aggregate =>
             a.groupingExpressions.foreach(
               expression =>
@@ -937,6 +993,17 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
               context = j.origin.getQueryContext,
               summary = j.origin.context.summary)
 
+          case j: NearestByJoin if !j.duplicateResolved =>
+            val conflictingAttributes =
+              j.left.outputSet.intersect(j.right.outputSet).map(toSQLExpr(_)).mkString(", ")
+            throw SparkException.internalError(
+              msg = s"""
+                       |Failure when resolving conflicting references in ${j.nodeName}:
+                       |${planToString(plan)}
+                       |Conflicting attributes: $conflictingAttributes.""".stripMargin,
+              context = j.origin.getQueryContext,
+              summary = j.origin.context.summary)
+
           // TODO: although map type is not orderable, technically map type should be able to be
           // used in equality comparison, remove this type check once we support it.
           case o if mapColumnInSetOperation(o).isDefined =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
index 75619c9c5ce39..8a83b576d7253 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
@@ -288,6 +288,16 @@ object CollationTypeCoercion extends SQLConfHelper {
             None
         }
 
+      case elementAt: ElementAt =>
+        findCollationContext(elementAt.left) match {
+          case Some(MapType(_, valueType, _)) =>
+            mergeWinner(elementAt.dataType, valueType)
+          case Some(ArrayType(elementType, _)) =>
+            mergeWinner(elementAt.dataType, elementType)
+          case _ =>
+            None
+        }
+
       case struct: CreateNamedStruct =>
         val childrenContexts = struct.valExprs.map(findCollationContext)
         if (childrenContexts.isEmpty) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
index 93d71642ac9fd..55488e8eed955 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -23,14 +23,17 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils.wrapOuterReference
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.NullType
 
 trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
 
@@ -147,8 +150,20 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
           // When strict DataFrame column resolution is disabled, we also allow name-based
           // resolution as a fallback for tagged attributes.
           val result = withPosition(u) {
-            resolveColumnByName(nameParts)
-              .orElse(LiteralFunctionResolution.resolve(nameParts))
+            // A parameterless built-in function takes precedence over a SQL UDF parameter
+            // that happens to share its name (per the documented SQL name resolution rules).
+            // Real columns from relations -- which don't carry the
+            // SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY -- continue to win as before.
+            // Gated by a legacy kill-switch conf so the pre-fix behavior can be restored.
+            val column = resolveColumnByName(nameParts)
+            val resolved = column match {
+              case Some(c) if isSQLFunctionParameterAlias(c) && !conf.getConf(
+                  SQLConf.LEGACY_ALLOW_UDF_PARAMETER_TO_SHADOW_PARAMETERLESS_FUNCTION) =>
+                LiteralFunctionResolution.resolve(nameParts).orElse(column)
+              case Some(_) => column
+              case None => LiteralFunctionResolution.resolve(nameParts)
+            }
+            resolved
               .map {
                 // We trim unnecessary alias here. Note that, we cannot trim the alias at top-level,
                 // as we should resolve `UnresolvedAttribute` to a named expression. The caller side
@@ -180,7 +195,19 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
             field
           }
           if (newChild.resolved) {
-            ExtractValue(child = newChild, extraction = resolvedField, resolver = resolver)
+            // applyOrNull propagates NULL when the base is NullType instead of throwing
+            // INVALID_EXTRACT_BASE_FIELD_TYPE, consistent with multipart field access (col.a).
+            val extracted = ExtractValue.applyOrNull(
+              child = newChild, extraction = resolvedField, resolver = resolver)
+            // A NullType base yields a bare NULL literal, which would otherwise produce an output
+            // column named `NULL`. Alias it with the extraction's text (e.g. `col[0]`) to keep a
+            // stable column name; CleanupAliases later trims this alias where it's not a top-level
+            // projection output.
+            if (newChild.dataType == NullType) {
+              Alias(extracted, toPrettySQL(u.copy(child = newChild, extraction = resolvedField)))()
+            } else {
+              extracted
+            }
           } else {
             u.copy(child = newChild, extraction = resolvedField)
           }
@@ -258,7 +285,8 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
 
   // Resolves `UnresolvedAttribute` to its value.
   protected def resolveVariables(e: Expression): Expression = {
-    val variableResolution = new VariableResolution(catalogManager.tempVariableManager)
+    val variableResolution =
+      new VariableResolution(catalogManager.tempVariableManager, catalogManager)
 
     def resolve(nameParts: Seq[String]): Option[Expression] = {
       variableResolution.resolveMultipartName(
@@ -504,7 +532,10 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
   //    5, resolve the expression against the target node, the resolved attribute will be
   //       filtered by the output attributes of nodes in the path (from matching to root node);
   //    6. if more than one resolved attributes are found in the above recursive process,
-  //       fails with 'AMBIGUOUS_COLUMN_REFERENCE'.
+  //       disambiguate by preferring regular candidates (visible via `output`) over
+  //       hidden ones (only via `metadataOutput`), then preferring depth-0 (direct)
+  //       matches over deeper ones; fails with 'AMBIGUOUS_COLUMN_REFERENCE' if neither
+  //       tiebreaker yields a single winner.
   //    7. if all the resolved attributes are filtered out, return the original expression
   //       as it is.
   private def tryResolveDataFrameColumns(
@@ -529,7 +560,7 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
 
     val isMetadataAccess = u.containsTag(LogicalPlan.IS_METADATA_COL)
 
-    val (resolved, matched) = resolveDataFrameColumnByPlanId(
+    val (candidates, matched) = resolveDataFrameColumnByPlanId(
       u, planId, isMetadataAccess, q, 0)
     if (!matched) {
       // Can not find the target plan node with plan id, e.g.
@@ -538,27 +569,41 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
       //  df1.select(df2.a)   <-   illegal reference df2.a
       throw QueryCompilationErrors.cannotResolveDataFrameColumn(u)
     }
-    resolved.map(_._1)
+
+    // If there is at least one regular (`hidden = false`) candidate, run the
+    // merge over regular candidates only and ignore hidden ones (e.g. a
+    // natural/USING join hidden key). Otherwise run the merge over hidden
+    // candidates.
+    val (regular, hidden) = candidates.partition(!_.hidden)
+    val finalists = if (regular.nonEmpty) regular else hidden
+    finalists.sortBy(_.depth).foldLeft(Option.empty[Candidate]) {
+      case (None, c) => Some(c)
+      // If the current winner is a direct match (depth 0) and a further
+      // candidate is nested deeper, prefer the direct one.
+      case (Some(c1), c2) if c1.depth == 0 && c2.depth != 0 => Some(c1)
+      case _ => throw QueryCompilationErrors.ambiguousColumnReferences(u)
+    }.map(_.expr)
   }
 
+  // Candidate threaded through the plan walk:
+  //   - expr:   the resolved expression
+  //   - depth:  the depth at which it was matched
+  //   - hidden: whether the candidate's references live in `p.metadataOutput`
+  //             at some plan node along the way (e.g. a natural/USING join
+  //             wrapper that hides a join key via `Project.hiddenOutputTag`).
+  private case class Candidate(expr: NamedExpression, depth: Int, hidden: Boolean)
+
   private def resolveDataFrameColumnByPlanId(
       u: UnresolvedAttribute,
       id: Long,
       isMetadataAccess: Boolean,
       q: Seq[LogicalPlan],
-      currentDepth: Int): (Option[(NamedExpression, Int)], Boolean) = {
+      currentDepth: Int): (Seq[Candidate], Boolean) = {
     val resolved = q.map(resolveDataFrameColumnRecursively(
       u, id, isMetadataAccess, _, currentDepth))
-    val merged = resolved
-      .flatMap(_._1)
-      .sortBy(_._2) // sort by depth
-      .foldLeft(Option.empty[(NamedExpression, Int)]) {
-        case (None, (r2, d2)) => Some((r2, d2))
-        case (Some((r1, 0)), (r2, d2)) if d2 != 0 => Some((r1, 0))
-        case _ => throw QueryCompilationErrors.ambiguousColumnReferences(u)
-      }
+    val candidates = resolved.flatMap(_._1)
     val matched = resolved.exists(_._2)
-    (merged, matched)
+    (candidates, matched)
   }
 
   private def resolveDataFrameColumnRecursively(
@@ -566,8 +611,8 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
       id: Long,
       isMetadataAccess: Boolean,
       p: LogicalPlan,
-      currentDepth: Int): (Option[(NamedExpression, Int)], Boolean) = {
-    val (resolved, matched) = if (p.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) {
+      currentDepth: Int): (Seq[Candidate], Boolean) = {
+    val (candidates, matched) = if (p.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) {
       val resolved = if (!isMetadataAccess) {
         p.resolve(u.nameParts, conf.resolver)
       } else if (u.nameParts.size == 1) {
@@ -575,10 +620,12 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
       } else {
         None
       }
-      // The targe plan node is found, but might still fail to resolve.
-      // In this case, return None to delay the failure, so it is possible to be
+      // The target plan node is found, but might still fail to resolve. In this
+      // case, return an empty Seq to delay the failure, so it is possible to be
       // resolved in the next iteration.
-      (resolved.map(r => (r, currentDepth)), true)
+      // Always initialize `hidden = false` here; the filter below will set it
+      // correctly based on whether the references live in `p.metadataOutput`.
+      (resolved.map(Candidate(_, currentDepth, hidden = false)).toSeq, true)
     } else {
       val children = p match {
         // treat Union node as the leaf node
@@ -616,12 +663,22 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
     // df = spark.range(10).withColumn("v", sf.col("id") + 1)
     // df.select(df.v).sort(df.id)
     //
-    // In this case, resolveDataFrameColumnByPlanId returns None,
+    // In this case, resolveDataFrameColumnByPlanId returns an empty Seq,
     // the dataframe column 'df.id' will remain unresolved, and the analyzer
     // will try to resolve 'id' without plan id later.
-    val filtered = resolved.filter { r =>
-      // A DataFrame column can be resolved as a metadata column, we should keep it.
-      r._1.references.subsetOf(AttributeSet(p.output ++ p.metadataOutput))
+    //
+    // A DataFrame column can also be resolved as a metadata column at some
+    // ancestor plan (e.g. a natural/USING join wrapper that hides a join key
+    // via `Project.hiddenOutputTag`). We accept that here but tag the candidate
+    // as `hidden` so the top-level merge in `resolveDataFrameColumn` can prefer
+    // a regular (p.output) match over hidden (p.metadataOutput) ones.
+    val filtered = candidates.flatMap { c =>
+      val hidden = c.hidden || c.expr.references.subsetOf(AttributeSet(p.metadataOutput))
+      if (c.expr.references.subsetOf(AttributeSet(p.output ++ p.metadataOutput))) {
+        Some(c.copy(hidden = hidden))
+      } else {
+        None
+      }
     }
     (filtered, matched)
   }
@@ -666,4 +723,18 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
        r.expressions.forall(_.references.subsetOf(outputSet))
      }
    }
+
+  /**
+   * True if `e` originates from a SQL UDF input parameter alias, as marked by
+   * `SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY` at parameter-alias
+   * construction sites. Unwraps `OuterReference` so callers that pass post-outer-resolution
+   * expressions still get a correct answer; the metadata lives on the underlying named
+   * expression.
+   */
+  private def isSQLFunctionParameterAlias(e: Expression): Boolean = e match {
+    case OuterReference(inner) => isSQLFunctionParameterAlias(inner)
+    case n: NamedExpression =>
+      n.metadata.contains(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY)
+    case _ => false
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
index 2a2440117e401..ec2ba4f692216 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
@@ -36,7 +36,8 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
     def noMissingInput(p: LogicalPlan) = !p.exists(_.missingInput.nonEmpty)
 
     newPlan.resolveOperatorsUpWithPruning(
-      _.containsAnyPattern(JOIN, LATERAL_JOIN, AS_OF_JOIN, INTERSECT, EXCEPT, UNION, COMMAND),
+      _.containsAnyPattern(
+        JOIN, LATERAL_JOIN, AS_OF_JOIN, NEAREST_BY_JOIN, INTERSECT, EXCEPT, UNION, COMMAND),
       ruleId) {
       case p: LogicalPlan if !p.childrenResolved => p
       // To resolve duplicate expression IDs for Join.
@@ -50,6 +51,10 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
       case j @ AsOfJoin(left, right, _, _, _, _, _)
           if !j.duplicateResolved && noMissingInput(right) =>
         j.copy(right = dedupRight(left, right))
+      // Resolve duplicate output for NearestByJoin.
+      case j @ NearestByJoin(left, right, _, _, _, _, _)
+          if !j.duplicateResolved && noMissingInput(right) =>
+        j.copy(right = dedupRight(left, right))
       // intersect/except will be rewritten to join at the beginning of optimizer. Here we need to
       // deduplicate the right side plan, so that we won't produce an invalid self-join later.
       case i @ Intersect(left, right, _) if !i.duplicateResolved && noMissingInput(right) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index ef7b25208928c..34b9559c58511 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -777,6 +777,7 @@ object FunctionRegistry {
     expression[UnixMillis]("unix_millis"),
     expression[UnixMicros]("unix_micros"),
     expression[ConvertTimezone]("convert_timezone"),
+    expressionBuilder("time_bucket", TimeBucketExpressionBuilder),
 
     // collection functions
     expression[CreateArray]("array"),
@@ -978,6 +979,7 @@ object FunctionRegistry {
     expression[SchemaOfVariant]("schema_of_variant"),
     expression[SchemaOfVariantAgg]("schema_of_variant_agg"),
     expression[ToVariantObject]("to_variant_object"),
+    expression[IsValidVariant]("is_valid_variant"),
 
     // Spatial
     expression[ST_AsBinary]("st_asbinary"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
index 4f3428cc69739..4f6aee03967cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
@@ -19,9 +19,12 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.concurrent.atomic.AtomicBoolean
 
+import scala.util.control.NonFatal
+
+import org.apache.spark.SparkThrowable
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -32,7 +35,8 @@ import org.apache.spark.sql.connector.catalog.{
   CatalogPlugin,
   CatalogV2Util,
   Identifier,
-  LookupCatalog
+  LookupCatalog,
+  ProcedureCatalog
 }
 
 /**
@@ -56,50 +60,68 @@ import org.apache.spark.sql.connector.catalog.functions.{
   UnboundFunction
 }
 import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.connector.V1Function
 import org.apache.spark.sql.types._
 
 class FunctionResolution(
     override val catalogManager: CatalogManager,
     relationResolution: RelationResolution)
-    extends DataTypeErrorsBase with LookupCatalog with Logging {
+    extends DataTypeErrorsBase with LookupCatalog with SQLConfHelper with Logging {
   private val v1SessionCatalog = catalogManager.v1SessionCatalog
 
   private val trimWarningEnabled = new AtomicBoolean(true)
 
-  /** Returns the current catalog path, preferring the view's context if resolving a view. */
-  private def currentCatalogPath: Seq[String] = {
-    val ctx = AnalysisContext.get.catalogAndNamespace
-    if (ctx.nonEmpty) ctx
-    else (Seq(catalogManager.currentCatalog.name) ++ catalogManager.currentNamespace).toSeq
-  }
-
   /** True if nameParts is 3-part and the first part is the system catalog name. */
   private def isSystemCatalogQualified(nameParts: Seq[String]): Boolean =
     nameParts.length == 3 &&
       nameParts.head.equalsIgnoreCase(CatalogManager.SYSTEM_CATALOG_NAME)
 
+  /**
+   * True iff `system.session` is searched before `system.builtin` in the effective SQL PATH.
+   *
+   * Drives the `count(*) -> count(1)` rewrite (which must skip transformation when a temp
+   * `count` shadows the builtin) and the `SessionCatalog` security check that blocks creating
+   * a temp function with a builtin's name. Reads the live PATH via `CatalogManager` and
+   * applies the same kinds extraction that drives `SessionCatalog`'s fast-path provider, so
+   * the predicate stays in sync with the lookup loop's actual order. Uses the consolidated
+   * snapshot helper (SPARK-56939) so the (catalog, namespace, path) triple is observed
+   * atomically.
+   */
+  def isSessionBeforeBuiltinInPath: Boolean = {
+    catalogManager.sessionFunctionKindsForUnqualifiedResolution().headOption
+      .contains(org.apache.spark.sql.catalyst.catalog.SessionCatalog.Temp)
+  }
+
   /**
    * Produces the ordered list of candidate names for resolution. Expansion happens in two cases:
    *
-   * 1. Single-part names: expanded via the search path, where each search path entry is
-   *    fully qualified so appending the name produces fully qualified candidates.
+   * 1. Single-part names: expanded via [[CatalogManager.sqlResolutionPathEntries]] (same list as
+   *    relation resolution), where each path entry is fully qualified so appending the name
+   *    produces fully qualified candidates.
    * 2. `builtin.name` or `session.name`: prepending `system` creates a fully qualified
    *    system catalog candidate. The original 2-part name is also kept as a persistent
    *    catalog candidate (qualified downstream). Order is controlled by
    *    the `persistentCatalogFirst` config.
    *
    * All other multi-part names are returned as-is for downstream resolution.
+   *
+   * When [[AnalysisContext.resolutionPathEntries]] is set (view or SQL function / table function
+   * body with a pinned path, with [[SQLConf.PATH_ENABLED]] true), that frozen list is used
+   * directly, matching [[RelationResolution.relationResolutionEntries]] so routine order stays
+   * aligned with relation order.
    */
+  private[analysis] def sqlResolutionPathEntriesForAnalysis: Seq[Seq[String]] =
+    catalogManager.resolutionPathEntriesForAnalysis(
+      AnalysisContext.get.resolutionPathEntries,
+      AnalysisContext.get.catalogAndNamespace)
+
   private def resolutionCandidates(nameParts: Seq[String]): Seq[Seq[String]] = {
     if (nameParts.size == 1) {
-      val searchPath = SQLConf.get.resolutionSearchPath(currentCatalogPath)
-      searchPath.map(_ ++ nameParts)
+      sqlResolutionPathEntriesForAnalysis.map(_ ++ nameParts)
     } else if (nameParts.size == 2 &&
         FunctionResolution.sessionNamespaceKind(nameParts).isDefined) {
       val systemCandidate = CatalogManager.SYSTEM_CATALOG_NAME +: nameParts
-      if (SQLConf.get.prioritizeSystemCatalog) {
+      if (conf.prioritizeSystemCatalog) {
         Seq(systemCandidate, nameParts)
       } else {
         Seq(nameParts, systemCandidate)
@@ -174,9 +196,10 @@ class FunctionResolution(
           case None =>
         }
       }
-      val searchPath = SQLConf.get.resolutionSearchPath(currentCatalogPath)
       throw QueryCompilationErrors.unresolvedRoutineError(
-        unresolvedFunc.nameParts, searchPath.map(toSQLId), unresolvedFunc.origin)
+        unresolvedFunc.nameParts,
+        sqlResolutionPathEntriesForAnalysis.map(toSQLId),
+        unresolvedFunc.origin)
     }
   }
 
@@ -345,6 +368,43 @@ class FunctionResolution(
     }
 
     // Check external catalog for persistent functions
+    if (nameParts.length == 1) {
+      // Must match [[resolutionCandidates]] / [[resolveFunction]]: single-part names use PATH +
+      // session order, not only the current namespace (LookupCatalog single-part rule).
+      // `system.session.<name>` and `system.builtin.<name>` candidates were already resolved by
+      // [[lookupBuiltinOrTempFunction]] / [[lookupBuiltinOrTempTableFunction]] above (they
+      // route through `identifierFromSystemNameParts`, which only accepts those two
+      // namespaces); skip them here to avoid redundant catalog calls. Other `system.<x>`
+      // namespaces -- if any are ever added -- still go through persistent lookup.
+      val persistentCandidates = resolutionCandidates(nameParts).filterNot { c =>
+        c.length >= 2 &&
+          c.head.equalsIgnoreCase(CatalogManager.SYSTEM_CATALOG_NAME) && {
+            val ns = c(1)
+            ns.equalsIgnoreCase(CatalogManager.SESSION_NAMESPACE) ||
+              ns.equalsIgnoreCase(CatalogManager.BUILTIN_NAMESPACE)
+          }
+      }
+      for (candidate <- persistentCandidates) {
+        try {
+          candidate match {
+            case CatalogAndIdentifier(catalog, ident) =>
+              if (catalog.asFunctionCatalog.functionExists(ident)) {
+                return FunctionType.Persistent
+              }
+            case _ =>
+          }
+        } catch {
+          // Only treat explicit "not found" / "forbidden" signals as a miss. Any other failure
+          // (e.g. permission denied, transient catalog error) propagates.
+          case _: NoSuchFunctionException
+             | _: NoSuchNamespaceException
+             | _: CatalogNotFoundException =>
+          case e: AnalysisException if e.getCondition == "FORBIDDEN_OPERATION" =>
+        }
+      }
+      return FunctionType.NotFound
+    }
+
     val CatalogAndIdentifier(catalog, ident) = relationResolution.expandIdentifier(nameParts)
     if (catalog.asFunctionCatalog.functionExists(ident)) {
       return FunctionType.Persistent
@@ -592,6 +652,66 @@ class FunctionResolution(
       errorClass = errorClass,
       messageParameters = messageParameters)
   }
+
+  /**
+   * Resolves [[UnresolvedProcedure]] for `CALL` / `DESCRIBE PROCEDURE` using the same multipart
+   * candidates as SQL functions and relations ([[resolutionCandidates]] /
+   * [[sqlResolutionPathEntriesForAnalysis]]). Catalogs that do not implement
+   * [[ProcedureCatalog]] are skipped for unqualified names; an explicitly catalog-qualified name
+   * that targets a non-[[ProcedureCatalog]] still raises
+   * [[QueryCompilationErrors.missingCatalogProceduresAbilityError]].
+   */
+  def resolveProcedure(unresolved: UnresolvedProcedure): LogicalPlan = {
+    val candidates = resolutionCandidates(unresolved.nameParts)
+    val skipCandidateFailures = unresolved.nameParts.length == 1
+    for (multipart <- candidates) {
+      val expandedOpt =
+        try {
+          Some(relationResolution.expandIdentifier(multipart))
+        } catch {
+          case NonFatal(_) => None
+        }
+      expandedOpt.foreach { expanded =>
+        CatalogAndIdentifier.unapply(expanded).foreach { case (catalog, ident) =>
+          catalog match {
+            case pc: ProcedureCatalog =>
+              try {
+                val procedure = pc.loadProcedure(ident)
+                return ResolvedProcedure(pc, ident, procedure)
+              } catch {
+                // ProcedureCatalog has no standard "not found" exception type today. For
+                // unqualified names searched through PATH, treat candidate failures as misses and
+                // continue to the next entry (matching table/function PATH iteration semantics).
+                // Explicitly catalog-qualified names still preserve existing error behavior.
+                case _: AnalysisException if skipCandidateFailures =>
+                case _: SparkThrowable if skipCandidateFailures =>
+                case NonFatal(_) if skipCandidateFailures =>
+                case e: AnalysisException => throw e
+                case e: SparkThrowable => throw e
+                case NonFatal(e) =>
+                  val cause = e match {
+                    case ex: Exception => ex
+                    case th => new RuntimeException(th)
+                  }
+                  throw QueryCompilationErrors.failedToLoadRoutineError(
+                    catalog.name +: ident.asMultipartIdentifier,
+                    cause)
+              }
+            case _ =>
+              if (unresolved.nameParts.length > 1 &&
+                  catalogManager.isCatalogRegistered(unresolved.nameParts.head) &&
+                  catalog.name().equalsIgnoreCase(unresolved.nameParts.head)) {
+                throw QueryCompilationErrors.missingCatalogProceduresAbilityError(catalog)
+              }
+          }
+        }
+      }
+    }
+    throw QueryCompilationErrors.unresolvedRoutineError(
+      unresolved.nameParts,
+      sqlResolutionPathEntriesForAnalysis.map(toSQLId),
+      unresolved.origin)
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationChanges.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationChanges.scala
index 2b4ba58d1745c..84f82ffc1f2ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationChanges.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationChanges.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.TreePattern.{RELATION_CHANGES, TreePattern}
-import org.apache.spark.sql.connector.catalog.ChangelogInfo
+import org.apache.spark.sql.connector.catalog.ChangelogContext
 
 /**
  * A logical node used to query Change Data Capture (CDC) changes for a table relation.
@@ -33,10 +33,10 @@ import org.apache.spark.sql.connector.catalog.ChangelogInfo
  * [[UnresolvedLeafNode]]). Tree traversals like `transformUp` will not visit `relation`.
  *
  * @param relation the table relation (typically an [[UnresolvedRelation]])
- * @param changelogInfo the CDC query parameters (range, deduplication mode, etc.)
+ * @param changelogContext the CDC query context (range, deduplication mode, etc.)
  */
 case class RelationChanges(
     relation: LogicalPlan,
-    changelogInfo: ChangelogInfo) extends UnresolvedLeafNode {
+    changelogContext: ChangelogContext) extends UnresolvedLeafNode {
   override val nodePatterns: Seq[TreePattern] = Seq(RELATION_CHANGES)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala
index e86248febd2eb..55a7ad10790ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala
@@ -23,7 +23,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.catalog.{
-  CatalogTableType,
+  CatalogTable,
   TemporaryViewRelation,
   UnresolvedCatalogRelation
 }
@@ -33,12 +33,17 @@ import org.apache.spark.sql.connector.catalog.{
   CatalogManager,
   CatalogPlugin,
   CatalogV2Util,
-  ChangelogInfo,
+  ChangelogContext,
   Identifier,
   LookupCatalog,
+  MetadataTable,
   Table,
+  TableCatalog,
+  TableViewCatalog,
   V1Table,
-  V2TableWithV1Fallback
+  V2TableWithV1Fallback,
+  ViewCatalog,
+  ViewInfo
 }
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors}
@@ -110,34 +115,45 @@ class RelationResolution(
 
   /**
    * Scope in the relation resolution search path. Used to interpret
-   * [[SQLConf.resolutionSearchPath]] when resolving unqualified table/view names.
+   * [[CatalogManager.sqlResolutionPathEntries]] when resolving unqualified table/view names.
    */
-  private sealed trait RelationResolutionScope
-  private case object SessionScope extends RelationResolutionScope
-  private case object PersistentScope extends RelationResolutionScope
+  private sealed trait RelationResolutionStep
+  private case object SessionScopeStep extends RelationResolutionStep
+  private case class PersistentCatalogStep(catalogAndNamespace: Seq[String])
+      extends RelationResolutionStep
 
   /**
-   * Returns the relation resolution search path for unqualified (1-part) names.
-   * Uses the single search path for all objects: [[SQLConf.resolutionSearchPath]].
-   * Maps path entries: system.session -> SessionScope, system.builtin -> skip (no views),
-   * other (catalog path) -> PersistentScope.
+   * Path entries for unqualified relation resolution.
+   *
+   * Inside a view or SQL function, [[AnalysisContext.resolutionPathEntries]] uses the
+   * persisted frozen path from metadata when available.
+   * When PATH is disabled, legacy resolution rules apply.
    */
-  private def relationResolutionSearchPath: Seq[RelationResolutionScope] = {
-    val catalogPath = (currentCatalog.name +: catalogManager.currentNamespace).toSeq
-    conf.resolutionSearchPath(catalogPath).flatMap {
-      case Seq("system", "session") => Some(SessionScope)
+  private def relationResolutionEntries: Seq[Seq[String]] = {
+    catalogManager.resolutionPathEntriesForAnalysis(
+      AnalysisContext.get.resolutionPathEntries,
+      AnalysisContext.get.catalogAndNamespace)
+  }
+
+  /**
+   * Ordered resolution steps for unqualified relation names. Each persistent path entry is kept
+   * with its catalog/namespace so lookup qualifies the object name under that entry (not only
+   * under the session's current namespace).
+   */
+  private def relationResolutionSteps: Seq[RelationResolutionStep] = {
+    relationResolutionEntries.flatMap {
+      case p if CatalogManager.isSystemSessionPathEntry(p) => Some(SessionScopeStep)
       case Seq("system", "builtin") => None
-      case _ => Some(PersistentScope)
+      case entry => Some(PersistentCatalogStep(entry))
     }
   }
 
   /**
    * Resolution search path formatted for TABLE_OR_VIEW_NOT_FOUND error messages.
-   * Same order as relationResolutionSearchPath; each entry is quoted (e.g. "`system`.`session`").
+   * Same order as [[relationResolutionSteps]]; each entry is quoted (e.g. "`system`.`session`").
    */
   def resolutionSearchPathForError: Seq[String] = {
-    val catalogPath = (currentCatalog.name +: catalogManager.currentNamespace).toSeq
-    conf.resolutionSearchPath(catalogPath).map(toSQLId)
+    relationResolutionEntries.map(toSQLId)
   }
 
   /**
@@ -195,15 +211,15 @@ class RelationResolution(
       ).orElse(tryResolvePersistent(u, identifier, finalTimeTravelSpec))
     }
 
-    // 1-part name: try each scope in relationResolutionSearchPath order (from
-    // [[SQLConf.resolutionSearchPath]]).
-    val candidates = relationResolutionSearchPath
-    for (scope <- candidates) {
-      val result = scope match {
-        case SessionScope =>
+    // 1-part name: try each step in [[relationResolutionSteps]] order (from
+    // [[CatalogManager.sqlResolutionPathEntries]]).
+    val steps = relationResolutionSteps
+    for (step <- steps) {
+      val result = step match {
+        case SessionScopeStep =>
           resolveTempView(identifier, u.isStreaming, finalTimeTravelSpec.isDefined)
-        case PersistentScope =>
-          tryResolvePersistent(u, identifier, finalTimeTravelSpec)
+        case PersistentCatalogStep(prefix) =>
+          tryResolvePersistent(u, prefix ++ identifier, finalTimeTravelSpec)
       }
       if (result.isDefined) return result
     }
@@ -227,11 +243,60 @@ class RelationResolution(
           .orElse {
             val writePrivileges = u.options.get(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES)
             val finalOptions = u.clearWritePrivileges.options
-            val table = CatalogV2Util.loadTable(
-              catalog,
-              ident,
-              finalTimeTravelSpec,
-              Option(writePrivileges))
+            // For a `TableViewCatalog` with no time-travel / write privileges, the single-RPC
+            // `loadTableOrView` answers both "is there a table?" and "is there a view?" in one
+            // call. Time-travel and write privileges apply to tables only, so for those the
+            // lookup falls through to the table-only `loadTable` path below; views are not
+            // reachable via the v2 fallback in those cases.
+            //
+            // Skip the table-side lookup entirely for view-only catalogs (no `TableCatalog`
+            // mixin): `CatalogV2Util.loadTable` would call `asTableCatalog` and throw
+            // MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-resolution path.
+            val tableOrView: Option[Table] = catalog match {
+              case mc: TableViewCatalog if finalTimeTravelSpec.isEmpty && writePrivileges == null =>
+                try {
+                  Some(mc.loadTableOrView(ident))
+                } catch {
+                  case _: NoSuchTableException => None
+                }
+              case _ =>
+                val tableSide: Option[Table] = if (
+                  CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog]
+                ) {
+                  CatalogV2Util.loadTable(
+                    catalog,
+                    ident,
+                    finalTimeTravelSpec,
+                    Option(writePrivileges))
+                } else {
+                  None
+                }
+                // Fallback to ViewCatalog for catalogs that host views but where loadTable
+                // returned None (or was skipped because there's no TableCatalog mixin).
+                // Time-travel / write privileges only apply to tables, not views, so the
+                // fallback only fires when both are absent.
+                tableSide.orElse {
+                  if (finalTimeTravelSpec.isEmpty && writePrivileges == null) {
+                    catalog match {
+                      case vc: ViewCatalog =>
+                        try {
+                          Some(new MetadataTable(vc.loadView(ident), ident.toString))
+                        } catch {
+                          case _: NoSuchViewException => None
+                        }
+                      case _ => None
+                    }
+                  } else {
+                    None
+                  }
+                }
+            }
+            // `table` is `tableOrView` filtered to tables only -- used for cache lookup since
+            // we don't share-cache views.
+            val table: Option[Table] = tableOrView.filter {
+              case t: MetadataTable if t.getTableInfo.isInstanceOf[ViewInfo] => false
+              case _ => true
+            }
 
             val sharedRelationCacheMatch = for {
               t <- table
@@ -249,7 +314,7 @@ class RelationResolution(
               val loaded = createRelation(
                 catalog,
                 ident,
-                table,
+                tableOrView,
                 finalOptions,
                 u.isStreaming,
                 finalTimeTravelSpec)
@@ -265,19 +330,17 @@ class RelationResolution(
    * Resolve a CDC (CHANGES) query: look up the catalog, call loadChangelog(), wrap in
    * ChangelogTable, and return a DataSourceV2Relation.
    */
-  def resolveChangelog(
-      u: UnresolvedRelation,
-      changelogInfo: ChangelogInfo): Option[LogicalPlan] = {
+  def resolveChangelog(u: UnresolvedRelation, ctx: ChangelogContext): Option[LogicalPlan] = {
     expandIdentifier(u.multipartIdentifier) match {
       case CatalogAndIdentifier(catalog, ident) =>
         val tableCatalog = catalog.asTableCatalog
         val changelog = try {
-          tableCatalog.loadChangelog(ident, changelogInfo)
+          tableCatalog.loadChangelog(ident, ctx, u.options)
         } catch {
           case _: UnsupportedOperationException =>
             throw QueryCompilationErrors.cdcNotSupportedError(tableCatalog.name())
         }
-        val changelogTable = ChangelogTable(changelog, changelogInfo)
+        val changelogTable = ChangelogTable(changelog, ctx)
         val relation = if (u.isStreaming) {
           StreamingRelationV2(
             None, changelogTable.name, changelogTable, u.options,
@@ -314,6 +377,22 @@ class RelationResolution(
       options: CaseInsensitiveStringMap,
       isStreaming: Boolean,
       timeTravelSpec: Option[TimeTravelSpec]): Option[LogicalPlan] = {
+    def createDataSourceV1Scan(v1Table: CatalogTable): LogicalPlan = {
+      if (isStreaming) {
+        if (v1Table.isViewLike) {
+          throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError(
+            ident.quoted
+          )
+        }
+        SubqueryAlias(
+          v1Table.fullIdent,
+          UnresolvedCatalogRelation(v1Table, options, isStreaming = true)
+        )
+      } else {
+        v1SessionCatalog.getRelation(v1Table, options)
+      }
+    }
+
     table.map {
       // To utilize this code path to execute V1 commands, e.g. INSERT,
       // either it must be session catalog, or tracksPartitionsInCatalog
@@ -324,19 +403,13 @@ class RelationResolution(
       case v1Table: V1Table
           if CatalogV2Util.isSessionCatalog(catalog)
           || !v1Table.catalogTable.tracksPartitionsInCatalog =>
-        if (isStreaming) {
-          if (v1Table.v1Table.tableType == CatalogTableType.VIEW) {
-            throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError(
-              ident.quoted
-            )
-          }
-          SubqueryAlias(
-            catalog.name +: ident.asMultipartIdentifier,
-            UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true)
-          )
-        } else {
-          v1SessionCatalog.getRelation(v1Table.v1Table, options)
-        }
+        createDataSourceV1Scan(v1Table.v1Table)
+
+      // MetadataTable is a sentinel meaning "interpret via v1", so unlike the V1Table
+      // case above we apply no session-catalog / tracksPartitionsInCatalog guard -- any catalog
+      // returning MetadataTable has opted into v1 read semantics.
+      case t: MetadataTable =>
+        createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t))
 
       case table =>
         if (isStreaming) {
@@ -385,7 +458,11 @@ class RelationResolution(
   }
 
   def resolveReference(ref: V2TableReference): LogicalPlan = {
-    val relation = getOrLoadRelation(ref)
+    val relation = if (ref.context.cacheable) {
+      getOrLoadRelation(ref)
+    } else {
+      loadRelation(ref)
+    }
     val planId = ref.getTagValue(LogicalPlan.PLAN_ID_TAG)
     cloneWithPlanId(relation, planId)
   }
@@ -402,10 +479,25 @@ class RelationResolution(
     }
   }
 
+  /**
+   * Loads the table for a [[V2TableReference]] and returns a resolved [[DataSourceV2Relation]].
+   *
+   * The catalog is re-resolved by name through the [[CatalogManager]] rather than reusing
+   * [[V2TableReference#catalog]] directly. When a transaction is active, the
+   * [[TransactionAwareCatalogManager]] redirects catalog lookups to the transaction's catalog
+   * instance, so the [[TableCatalog#loadTable]] call is intercepted by the transaction catalog,
+   * which uses it to track which tables are read as part of the transaction.
+   */
   private def loadRelation(ref: V2TableReference): LogicalPlan = {
-    val table = ref.catalog.loadTable(ref.identifier)
+    val resolvedCatalog = catalogManager.catalog(ref.catalog.name).asTableCatalog
+    val table = resolvedCatalog.loadTable(ref.identifier)
     V2TableReferenceUtils.validateLoadedTable(table, ref)
-    ref.toRelation(table)
+    DataSourceV2Relation(
+      table = table,
+      output = ref.output,
+      catalog = Some(resolvedCatalog),
+      identifier = Some(ref.identifier),
+      options = ref.options)
   }
 
   private def adaptCachedRelation(cached: LogicalPlan, ref: V2TableReference): LogicalPlan = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index f34c6be9954e9..185a5503b1107 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -44,7 +44,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
     case c @ CreateVariable(identifiers, _, _) =>
       // We resolve only UnresolvedIdentifiers, and pass on the other nodes
       val resolved = identifiers.map {
-        case UnresolvedIdentifier(nameParts, _) =>
+        case u @ UnresolvedIdentifier(nameParts, _) =>
           if (withinLocalVariableScope) {
             if (c.replace) {
               throw new AnalysisException(
@@ -67,20 +67,22 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
             val resolvedIdentifier
             = catalogManager.tempVariableManager.qualify(nameParts.last)
 
-            assertValidSessionVariableNameParts(nameParts, resolvedIdentifier)
+            assertValidSessionVariableNameParts(nameParts, resolvedIdentifier, u.origin)
             resolvedIdentifier
           }
         case plan => plan
       }
       c.copy(names = resolved)
 
-    case d @ DropVariable(UnresolvedIdentifier(nameParts, _), _) =>
+    case d @ DropVariable(u @ UnresolvedIdentifier(nameParts, _), _) =>
       if (withinLocalVariableScope) {
         throw new AnalysisException(
           "UNSUPPORTED_FEATURE.SQL_SCRIPTING_DROP_TEMPORARY_VARIABLE", Map.empty)
       }
+      // DDL on session variables targets `system.session` directly; the SQL path only applies
+      // to DML (see [[VariableResolution.allowUnqualifiedSessionTempVariableLookup]]).
       val resolved = catalogManager.tempVariableManager.qualify(nameParts.last)
-      assertValidSessionVariableNameParts(nameParts, resolved)
+      assertValidSessionVariableNameParts(nameParts, resolved, u.origin)
       d.copy(name = resolved)
 
     case CreateFunction(UnresolvedIdentifier(nameParts, _), _, _, _, _)
@@ -215,13 +217,15 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
 
   private def assertValidSessionVariableNameParts(
       nameParts: Seq[String],
-      resolvedIdentifier: ResolvedIdentifier): Unit = {
+      resolvedIdentifier: ResolvedIdentifier,
+      origin: Origin): Unit = {
     if (!validSessionVariableName(nameParts)) {
       throw QueryCompilationErrors.unresolvedVariableError(
         nameParts,
-        Seq(
+        Seq(Seq(
           resolvedIdentifier.catalog.name(),
-          resolvedIdentifier.identifier.namespace().head)
+          resolvedIdentifier.identifier.namespace().head)),
+        origin
       )
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala
new file mode 100644
index 0000000000000..ed9333d3a27a2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala
@@ -0,0 +1,975 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import java.util.UUID
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.{
+  CollectList,
+  Count,
+  First,
+  Last,
+  Max,
+  Min
+}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
+import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogContext}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSourceV2Relation}
+import org.apache.spark.sql.streaming.{OutputMode, StatefulProcessor}
+import org.apache.spark.sql.types.{BooleanType, DataType, IntegerType, MetadataBuilder, StringType, StructField, StructType}
+import org.apache.spark.unsafe.types.CalendarInterval
+
+/**
+ * Post-processes a resolved [[ChangelogTable]] read to apply CDC option semantics
+ * (carry-over removal, update detection, net change computation) and to enforce
+ * supported option combinations.
+ *
+ * Fires after [[ResolveRelations]] has wrapped the connector's [[Changelog]] in a
+ * [[ChangelogTable]]. Both batch ([[DataSourceV2Relation]]) and streaming
+ * ([[StreamingRelationV2]]) reads are handled:
+ *   - Batch: the requested post-processing passes are injected as logical operators on top
+ *     of the relation. Carry-over removal and update detection are fused into a single
+ *     pass over a (rowId, _commit_version)-partitioned Window: the Filter drops CoW
+ *     carry-over pairs (same rowVersion on both sides) and the subsequent Project relabels
+ *     real delete+insert pairs as update_preimage / update_postimage. Net change
+ *     computation runs on top of that, collapsing intermediate states per `rowId`.
+ *   - Streaming: row-level passes (carry-over removal and update detection) are supported
+ *     by rewriting the same logic in streaming-allowed primitives -- an
+ *     [[EventTimeWatermark]] on `_commit_timestamp`, a stateful [[Aggregate]] keyed by
+ *     `(rowId, _commit_version, _commit_timestamp)` that buffers events into an array, an
+ *     optional [[Filter]] for carry-over removal, a [[Generate]] using `Inline` to
+ *     re-emit the buffered events as rows, and an optional relabel [[Project]] for
+ *     update detection. Net change computation is supported by delegating per-row-identity
+ *     state management to a [[CdcNetChangesStatefulProcessor]] driven by
+ *     [[TransformWithState]] -- the processor keeps the first and last event observed for
+ *     each row identity and emits the SPIP collapse output when the global watermark
+ *     advances past the last `_commit_timestamp` seen for that key. Row identities only
+ *     touched in the latest observed commit are held back until a later commit advances
+ *     the watermark or the source terminates. Streams that don't require any
+ *     post-processing pass through unchanged.
+ */
+object ResolveChangelogTable extends Rule[LogicalPlan] {
+
+  /**
+   * Reserved (`__spark_cdc_*`) column names used internally by post-processing;
+   * connectors must not emit columns with these names.
+   */
+  object HelperColumn {
+    final val DelCnt = "__spark_cdc_del_cnt"
+    final val InsCnt = "__spark_cdc_ins_cnt"
+    final val MinRv = "__spark_cdc_min_rv"
+    final val MaxRv = "__spark_cdc_max_rv"
+    final val RvCnt = "__spark_cdc_rv_cnt"
+    // Streaming-only: array of struct buffering all input rows for one (rowId,
+    // _commit_version) group, fed into Generate(Inline(...)) to re-emit per-row.
+    final val Events = "__spark_cdc_events"
+
+    val all: Set[String] = Set(DelCnt, InsCnt, MinRv, MaxRv, RvCnt, Events)
+  }
+
+  /**
+   * Metadata-key marker placed on the `__spark_cdc_events` aggregate's output attribute
+   * by [[addStreamingRowLevelPostProcessing]]. Downstream rules
+   * (`UnsupportedOperationChecker`'s CDC-specific output-mode check) detect the
+   * streaming row-level rewrite by looking for this marker rather than by string-matching
+   * the helper column's name -- mirroring the existing `EventTimeWatermark.delayKey` and
+   * `SessionWindow.marker` patterns and surviving any optimization that might relabel
+   * or rewrite the alias.
+   */
+  final val streamingPostProcessingMarker = "spark.cdc.streamingPostProcessing"
+
+  /**
+   * Reserved (`__spark_cdc_*`) column names used internally by net-change
+   * computation; connectors must not emit columns with these names.
+   */
+  object NetChangesHelperColumns {
+    final val RowNumber = "__spark_cdc_row_number"
+    final val RowCount = "__spark_cdc_row_count"
+    final val FirstRowChangeTypeValue =
+      "__spark_cdc_first_row_change_type_value"
+    final val LastRowChangeTypeValue = "__spark_cdc_last_row_change_type_value"
+    // Streaming-only: rowId expressions are aliased to top-level helper columns named
+    // `__spark_cdc_rowid_<idx>` so they can be referenced as plain Attributes in the
+    // grouping list of `transformWithState`.
+    def rowIdColumn(idx: Int): String = s"__spark_cdc_rowid_$idx"
+
+    val all: Set[String] =
+      Set(RowNumber, RowCount, FirstRowChangeTypeValue, LastRowChangeTypeValue)
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case rel @ DataSourceV2Relation(table: ChangelogTable, _, _, _, _, _) if !table.resolved =>
+      val changelog = table.changelog
+      val req = evaluateRequirements(changelog, table.changelogContext)
+
+      val resolvedRel = rel.copy(table = table.copy(resolved = true))
+      var updatedRel: LogicalPlan = resolvedRel
+      if (req.requiresCarryOverRemoval || req.requiresUpdateDetection) {
+        updatedRel = addRowLevelPostProcessing(
+          resolvedRel, changelog, req.requiresCarryOverRemoval, req.requiresUpdateDetection)
+      }
+      if (req.requiresNetChanges) {
+        // Resolve rowId against the bare DataSourceV2Relation. V2ExpressionUtils.resolveRefs
+        // requires a V2-shaped plan; addRowLevelPostProcessing may have wrapped the relation
+        // in Project/Window, which would break resolution against `updatedRel`. Catalyst
+        // preserves these resolved attributes by ExprId through any wrapping operators, so
+        // they remain valid references for the netChanges Window built on top.
+        val rowIdExprs =
+          V2ExpressionUtils.resolveRefs[NamedExpression](changelog.rowId().toSeq, resolvedRel)
+        updatedRel = injectNetChangeComputation(
+          updatedRel, rowIdExprs, table.changelogContext.computeUpdates())
+      }
+      updatedRel
+
+    case rel @ StreamingRelationV2(_, _, table: ChangelogTable, _, _, _, _, _, _)
+        if !table.resolved =>
+      val changelog = table.changelog
+      val req = evaluateRequirements(changelog, table.changelogContext)
+      val resolvedRel = rel.copy(table = table.copy(resolved = true))
+      var updatedRel: LogicalPlan = resolvedRel
+      if (req.requiresCarryOverRemoval || req.requiresUpdateDetection) {
+        updatedRel = addStreamingRowLevelPostProcessing(
+          resolvedRel, changelog, req.requiresCarryOverRemoval, req.requiresUpdateDetection)
+      }
+      if (req.requiresNetChanges) {
+        // Resolve the rowId references against `updatedRel` (the post-row-level plan)
+        // rather than the bare `resolvedRel`. The streaming row-level rewrite uses
+        // Aggregate + Generate(Inline), neither of which preserves the original
+        // attribute ExprIds for the inlined columns; resolving against `resolvedRel`
+        // yields stale ExprIds that fail post-analysis attribute resolution. The
+        // row-level rewrite preserves the connector's schema (column names) on its
+        // output, so name-based resolution against `updatedRel` recovers the right
+        // attributes regardless of any preceding wrapping.
+        updatedRel = addStreamingNetChangeComputation(
+          updatedRel, changelog, table.changelogContext.computeUpdates())
+      }
+      updatedRel
+  }
+
+  // ---------------------------------------------------------------------------
+  // Option validation & Requirement Computation
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Captures which post-processing passes a CDC query requires, derived from the
+   * user-provided [[ChangelogContext]] and the connector-declared [[Changelog]]
+   * capability flags.
+   */
+  private case class PostProcessingRequirements(
+      requiresCarryOverRemoval: Boolean,
+      requiresUpdateDetection: Boolean,
+      requiresNetChanges: Boolean) {
+    def needsAny: Boolean =
+      requiresCarryOverRemoval || requiresUpdateDetection || requiresNetChanges
+  }
+
+  /**
+   * Validates CDC option/capability combinations and computes which post-processing
+   * passes are required. Throws an [[org.apache.spark.sql.AnalysisException]] for
+   * unsupported or contradictory combinations (currently: `computeUpdates` with
+   * surfaced carry-overs but no carry-over removal).
+   */
+  private def evaluateRequirements(
+      changelog: Changelog,
+      context: ChangelogContext): PostProcessingRequirements = {
+    val requiresCarryOverRemoval =
+      context.deduplicationMode() != ChangelogContext.DeduplicationMode.NONE &&
+        changelog.containsCarryoverRows()
+    val requiresUpdateDetection =
+      context.computeUpdates() && changelog.representsUpdateAsDeleteAndInsert()
+    val requiresNetChanges =
+      context.deduplicationMode() == ChangelogContext.DeduplicationMode.NET_CHANGES &&
+        changelog.containsIntermediateChanges()
+
+    // If carry-overs are surfaced and update detection is enabled without carry-over
+    // removal, carry-overs would be falsely classified as updates, leading to wrong
+    // results. Hence we throw.
+    if (requiresUpdateDetection &&
+        changelog.containsCarryoverRows() &&
+        context.deduplicationMode() == ChangelogContext.DeduplicationMode.NONE) {
+      throw QueryCompilationErrors.cdcUpdateDetectionRequiresCarryOverRemoval(
+        changelog.name())
+    }
+
+    PostProcessingRequirements(
+      requiresCarryOverRemoval, requiresUpdateDetection, requiresNetChanges)
+  }
+
+  // ---------------------------------------------------------------------------
+  // Row Level Post Processing (Update Detection & Carry-over Removal)
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Adds row-level post-processing (carry-over removal and/or update detection) on top of
+   * the given plan. `counts` = per-partition delete and insert change_type row counts over
+   * `(rowId, _commit_version)`. `rv bounds` = per-partition min/max of `rowVersion`.
+   * Equal bounds signal a copy-on-write carry-over.
+   *   - both active     -> Window(counts + rv bounds) -> Filter -> Project(relabel) -> Drop helpers
+   *   - carry-over only -> Window(counts + rv bounds) -> Filter -> Drop helpers
+   *   - update only     -> Window(counts only) -> Project(relabel) -> Drop helpers
+   *   - neither         -> not invoked (caller guards this case)
+   */
+  private def addRowLevelPostProcessing(
+      plan: LogicalPlan,
+      cl: Changelog,
+      requiresCarryOverRemoval: Boolean,
+      requiresUpdateDetection: Boolean): LogicalPlan = {
+    // Row-version bounds in the Window are needed iff we filter carry-over pairs.
+    var modifiedPlan = addPostProcessingWindow(plan, cl,
+      includeRowVersionBounds = requiresCarryOverRemoval)
+    if (requiresCarryOverRemoval) modifiedPlan = addCarryOverPairFilter(modifiedPlan)
+    if (requiresUpdateDetection) modifiedPlan = addUpdateRelabelProjection(modifiedPlan)
+    removeHelperColumns(modifiedPlan)
+  }
+
+  /**
+   * Streaming counterpart of [[addRowLevelPostProcessing]].
+   *
+   * ==Why a different shape from the batch path?==
+   *
+   * The batch rewrite is Window-based:
+   * {{{
+   *   DataSourceV2Relation
+   *     -> Window partitioned by (rowId..., _commit_version)
+   *     -> [Filter (carry-over)]
+   *     -> [Project (update relabel)]
+   *     -> Project (drop helper columns)
+   * }}}
+   * [[org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker]] rejects
+   * `Window` on streaming queries (`NON_TIME_WINDOW_NOT_SUPPORTED_IN_STREAMING`).
+   * Replacing it with a plain [[Aggregate]] is not enough on its own: an aggregate
+   * collapses each group to a single row, losing the per-input rows we still need to
+   * relabel/filter; and an append-mode streaming aggregate without an event-time
+   * watermark on a grouping key is itself rejected by the checker.
+   *
+   * ==The rewritten plan==
+   *
+   * Two adjustments over the naive substitution: (a) inject an [[EventTimeWatermark]]
+   * on `_commit_timestamp` (zero delay) so the aggregate is legal in append mode, and
+   * (b) buffer every input row of a group as `Inline`-able structs and re-explode after
+   * the aggregate so no rows are lost.
+   * {{{
+   *   DataSourceV2Relation
+   *     -> Filter (RaiseError on NULL _commit_timestamp)
+   *     -> EventTimeWatermark(_commit_timestamp, 0s)
+   *     -> Aggregate
+   *          group by (rowId..., _commit_version, _commit_timestamp)
+   *          aggs    : _del_cnt, _ins_cnt
+   *                    [, _min_rv, _max_rv, _rv_cnt  (carry-over removal only)]
+   *                    , __spark_cdc_events = collect_list(struct(*))
+   *     -> [Filter (carry-over: _del_cnt=1 AND _ins_cnt=1
+   *                             AND _rv_cnt=2 AND _min_rv=_max_rv)]
+   *     -> Generate(Inline(__spark_cdc_events))   // re-emit one row per buffered input
+   *     -> [Project (update relabel)]
+   *     -> Project (drop helper columns)
+   *     -> Project (strip internal EventTimeWatermark metadata)
+   * }}}
+   *
+   * ==Runtime walkthrough==
+   *
+   * Append-mode streaming aggregates emit a group when its event-time grouping key
+   * falls at or below the global watermark (eviction predicate `eventTime <= watermark`,
+   * applied at the start of the next micro-batch). Suppose three commits with
+   * `_commit_timestamp` 10, 20, 30 each arrive in their own micro-batch:
+   * {{{
+   *   batch  max _ts seen  watermark after batch  groups emitted by this batch
+   *   -----  ------------  ---------------------  ----------------------------
+   *     1         10                10            <none>
+   *     2         20                20            groups with _commit_timestamp == 10
+   *     3         30                30            groups with _commit_timestamp == 20
+   *   end-of-stream final flush                   groups with _commit_timestamp == 30
+   * }}}
+   * Because every row of a single commit shares the same `_commit_timestamp` (CDC
+   * contract), advancing past commit T releases every group whose grouping
+   * `_commit_timestamp` equals T -- one commit's worth of post-processed output per
+   * micro-batch, with the final commit flushed on stream termination.
+   *
+   * ==Per-operator detail==
+   *
+   *  0. [[Filter]] guarding against NULL `_commit_timestamp` -- raises
+   *     `CHANGELOG_CONTRACT_VIOLATION.NULL_COMMIT_TIMESTAMP` for any row that
+   *     violates the contract. A NULL would never satisfy the downstream Aggregate's
+   *     `eventTime <= watermark` eviction predicate (NULL is silent in MAX, never
+   *     compares less-than-or-equal), so its group would be held in state forever.
+   *     Failing fast surfaces the connector bug instead of producing no output.
+   *  1. [[EventTimeWatermark]] on `_commit_timestamp` (zero delay) -- required so the
+   *     downstream stateful aggregate can emit groups in append output mode. By CDC
+   *     contract every row in a single commit shares `_commit_timestamp`, so taking it
+   *     as event time is safe.
+   *  2. [[Aggregate]] keyed by `(rowId..., _commit_version, _commit_timestamp)`. Computes
+   *     the same `_del_cnt` / `_ins_cnt` / (`_min_rv` / `_max_rv` / `_rv_cnt`) helpers as
+   *     the batch path, plus an `__spark_cdc_events` array-of-struct buffering every
+   *     input row of the group. `_commit_timestamp` is included in the grouping keys
+   *     (besides being a no-op given the contract) to satisfy
+   *     [[org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker]]'s
+   *     requirement that the watermark attribute appear among grouping expressions for
+   *     append-mode streaming aggregations.
+   *  3. [[Filter]] (only when carry-over removal is requested) on the same predicate as
+   *     the batch path -- groups with `_del_cnt = 1 AND _ins_cnt = 1 AND _rv_cnt = 2 AND
+   *     _min_rv = _max_rv` are dropped wholesale.
+   *  4. [[Generate]] using `Inline(events)` to re-emit one output row per buffered input
+   *     row. `unrequiredChildIndex` drops the duplicate grouping columns and the events
+   *     buffer; the helper count columns flow through.
+   *  5. [[Project]] (only when update detection is requested) applying the same
+   *     `CHANGELOG_CONTRACT_VIOLATION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION`
+   *     guard and `_change_type` relabel as the batch path.
+   *  6. [[Project]] (via [[removeHelperColumns]]) drops `__spark_cdc_*` helpers so
+   *     the output schema matches the connector's declared schema.
+   *  7. Final [[Project]] (via [[stripCommitTimestampWatermarkMetadata]]) clears the
+   *     `EventTimeWatermark.delayKey` from the user-visible `_commit_timestamp`
+   *     attribute so a downstream user-supplied `withWatermark` on a different column
+   *     does not interact with our internal watermark via the global multi-watermark
+   *     policy.
+   */
+  private def addStreamingRowLevelPostProcessing(
+      plan: LogicalPlan,
+      cl: Changelog,
+      requiresCarryOverRemoval: Boolean,
+      requiresUpdateDetection: Boolean): LogicalPlan = {
+    // Fail fast on a NULL `_commit_timestamp`. The downstream Aggregate uses it as
+    // both an event-time watermark column and a grouping key; a NULL group-key value
+    // would never satisfy the `eventTime <= watermark` eviction predicate, so the
+    // group would silently stall (held in state until end of stream). Mirrors the
+    // runtime check in [[CdcNetChangesStatefulProcessor]] -- fail fast at the
+    // contract violation rather than producing no output.
+    val plan1 = addNullCommitTimestampGuard(plan)
+    val rawCommitTsAttr = getAttribute(plan1, "_commit_timestamp")
+    val watermarked = EventTimeWatermark(
+      UUID.randomUUID(), rawCommitTsAttr, new CalendarInterval(0, 0, 0L), plan1)
+
+    val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](
+      cl.rowId().toSeq, watermarked)
+    val commitVersionAttr = getAttribute(watermarked, "_commit_version")
+    // Pick up the post-watermark `_commit_timestamp` attribute -- it carries the
+    // EventTimeWatermark.delayKey metadata that UnsupportedOperationChecker scans for.
+    val commitTimestampAttr = getAttribute(watermarked, "_commit_timestamp")
+    val changeTypeAttr = getAttribute(watermarked, "_change_type")
+
+    val groupingExprs: Seq[Expression] =
+      rowIdExprs ++ Seq(commitVersionAttr, commitTimestampAttr)
+    val groupingNamedExprs: Seq[NamedExpression] =
+      groupingExprs.map(_.asInstanceOf[NamedExpression])
+
+    val insertIf = If(EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_INSERT)),
+      Literal(1), Literal(null, IntegerType))
+    val deleteIf = If(EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_DELETE)),
+      Literal(1), Literal(null, IntegerType))
+    val delCntAlias = Alias(
+      Count(Seq(deleteIf)).toAggregateExpression(), HelperColumn.DelCnt)()
+    val insCntAlias = Alias(
+      Count(Seq(insertIf)).toAggregateExpression(), HelperColumn.InsCnt)()
+
+    val rvAliases = if (requiresCarryOverRemoval) {
+      val rowVersionExpr = V2ExpressionUtils.resolveRef[NamedExpression](
+        cl.rowVersion(), watermarked)
+      Seq(
+        Alias(Min(rowVersionExpr).toAggregateExpression(), HelperColumn.MinRv)(),
+        Alias(Max(rowVersionExpr).toAggregateExpression(), HelperColumn.MaxRv)(),
+        Alias(Count(Seq(rowVersionExpr)).toAggregateExpression(), HelperColumn.RvCnt)())
+    } else Seq.empty
+
+    // Buffer every input row as a struct so Inline can re-emit them after the aggregate.
+    // The grouping-key columns (rowId..., `_commit_version`, `_commit_timestamp`) appear
+    // both inside the struct and as top-level grouping outputs; the top-level duplicates
+    // are dropped via `unrequiredChildIndex` below.
+    val structOfAllCols = CreateStruct(watermarked.output)
+    // Attach a metadata marker to the `__spark_cdc_events` alias so downstream rules
+    // can detect the streaming row-level rewrite by metadata rather than by helper
+    // column name (mirrors `SessionWindow.marker` / `EventTimeWatermark.delayKey`).
+    val eventsMetadata = new MetadataBuilder()
+      .putBoolean(streamingPostProcessingMarker, true)
+      .build()
+    val eventsAlias = Alias(
+      new CollectList(structOfAllCols).toAggregateExpression(), HelperColumn.Events)(
+      explicitMetadata = Some(eventsMetadata))
+
+    val aggregateExprs: Seq[NamedExpression] =
+      groupingNamedExprs ++ Seq(delCntAlias, insCntAlias) ++ rvAliases :+ eventsAlias
+    val aggregated = Aggregate(groupingExprs, aggregateExprs, watermarked)
+
+    val filtered: LogicalPlan = if (requiresCarryOverRemoval) {
+      Filter(Not(buildCarryOverPairPredicate(aggregated)), aggregated)
+    } else aggregated
+
+    // Inline the struct array back into rows. Drop the events column (consumed by Inline)
+    // and the grouping-key columns (re-emitted from inside the struct) so the final shape
+    // matches the connector's schema plus the surviving helper count columns.
+    val eventsAttr = getAttribute(filtered, HelperColumn.Events)
+    val groupingAttrSet = AttributeSet(groupingNamedExprs.map(_.toAttribute))
+    val unrequiredChildIndex: Seq[Int] = filtered.output.zipWithIndex.collect {
+      case (a, i) if a.exprId == eventsAttr.exprId => i
+      case (a, i) if groupingAttrSet.contains(a) => i
+    }
+    val generatorOutput: Seq[Attribute] = watermarked.output.map { col =>
+      AttributeReference(col.name, col.dataType, col.nullable, col.metadata)()
+    }
+    val generated = Generate(
+      Inline(eventsAttr),
+      unrequiredChildIndex = unrequiredChildIndex,
+      outer = false,
+      qualifier = None,
+      generatorOutput = generatorOutput,
+      child = filtered)
+
+    val withRelabel: LogicalPlan = if (requiresUpdateDetection) {
+      addUpdateRelabelProjection(generated)
+    } else generated
+
+    // Strip the auto-injected EventTimeWatermark metadata from the user-visible
+    // `_commit_timestamp` so it does not interact with downstream user-supplied
+    // watermarks via the global multi-watermark policy. The metadata flows through
+    // Generate(Inline) (which copies attribute metadata) and the relabel Project, so
+    // it must be cleared here at the boundary of the rewrite.
+    val cleaned = stripCommitTimestampWatermarkMetadata(withRelabel)
+    removeHelperColumns(cleaned)
+  }
+
+  /**
+   * Adds a `Filter` that raises
+   * `CHANGELOG_CONTRACT_VIOLATION.NULL_COMMIT_TIMESTAMP` for any input row whose
+   * `_commit_timestamp` is `NULL`. Used as the first step of the streaming row-level
+   * rewrite so a contract-violating connector fails fast instead of silently stalling
+   * the downstream stateful aggregate's group.
+   */
+  private def addNullCommitTimestampGuard(input: LogicalPlan): LogicalPlan = {
+    val commitTsAttr = getAttribute(input, "_commit_timestamp")
+    // Use a dedicated, side-effecting catalyst expression rather than a
+    // `CaseWhen(IsNull(c) -> RaiseError, true)` predicate. Spark's
+    // `NullPropagation` rule rewrites `IsNull(c)` to `false` whenever `c.nullable`
+    // is `false` and similarly eliminates `AssertNotNull(c)` for non-nullable `c`
+    // (`expressions.scala:920-926`). A connector can reasonably declare
+    // `_commit_timestamp` as non-nullable in its schema while still emitting NULL
+    // at runtime in violation of the contract -- under those rules the guard
+    // would be optimized away and the runtime NULL would silently stall the
+    // group. `CdcAssertCommitTimestampNotNull` is unrecognised by
+    // `NullPropagation` and stays in the plan regardless of the column's
+    // declared nullability, surfacing the violation immediately.
+    Filter(CdcAssertCommitTimestampNotNull(commitTsAttr), input)
+  }
+
+  /**
+   * Final boundary for the streaming row-level rewrite: rebuilds the user-visible
+   * `_commit_timestamp` attribute with empty watermark-related metadata. Other
+   * attributes flow through unchanged.
+   */
+  private def stripCommitTimestampWatermarkMetadata(input: LogicalPlan): LogicalPlan = {
+    val projectList: Seq[NamedExpression] = input.output.map { attr =>
+      if (attr.name == "_commit_timestamp" &&
+          attr.metadata.contains(EventTimeWatermark.delayKey)) {
+        val cleanedMetadata = new MetadataBuilder()
+          .withMetadata(attr.metadata)
+          .remove(EventTimeWatermark.delayKey)
+          .build()
+        Alias(attr.withMetadata(cleanedMetadata), attr.name)(
+          exprId = attr.exprId,
+          qualifier = attr.qualifier)
+      } else {
+        attr
+      }
+    }
+    Project(projectList, input)
+  }
+
+  /**
+   * Adds a Window node partitioned by (rowId, _commit_version) that computes
+   * `_del_cnt` and `_ins_cnt` per partition, and, when `includeRowVersionBounds`
+   * is true, additionally `_min_rv` / `_max_rv` / `_rv_cnt` (min, max and non-null
+   * count of `Changelog.rowVersion()`).
+   *
+   * `_del_cnt` / `_ins_cnt` drive update detection (1 each -> relabel as
+   * update_preimage / update_postimage). `_min_rv` / `_max_rv` / `_rv_cnt` drive
+   * carry-over detection (within a delete+insert pair, `_rv_cnt = 2` AND equal
+   * bounds signal a CoW carry-over).
+   */
+  private def addPostProcessingWindow(
+      plan: LogicalPlan,
+      cl: Changelog,
+      includeRowVersionBounds: Boolean): LogicalPlan = {
+    val changeTypeAttr = getAttribute(plan, "_change_type")
+    val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan)
+    val commitVersionAttr = getAttribute(plan, "_commit_version")
+    val partitionByCols = rowIdExprs ++ Seq(commitVersionAttr)
+    val windowSpec = WindowSpecDefinition(partitionByCols, Nil, UnspecifiedFrame)
+
+    val insertIf = If(EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_INSERT)),
+      Literal(1), Literal(null, IntegerType))
+    val deleteIf = If(EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_DELETE)),
+      Literal(1), Literal(null, IntegerType))
+
+    val insCntAlias = Alias(WindowExpression(
+      Count(Seq(insertIf)).toAggregateExpression(), windowSpec), HelperColumn.InsCnt)()
+    val delCntAlias = Alias(WindowExpression(
+      Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), HelperColumn.DelCnt)()
+    val baseAliases = Seq(delCntAlias, insCntAlias)
+    val rowVersionAliases = if (includeRowVersionBounds) {
+      val rowVersionExpr =
+        V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan)
+      Seq(
+        Alias(WindowExpression(
+          Min(rowVersionExpr).toAggregateExpression(), windowSpec), HelperColumn.MinRv)(),
+        Alias(WindowExpression(
+          Max(rowVersionExpr).toAggregateExpression(), windowSpec), HelperColumn.MaxRv)(),
+        Alias(WindowExpression(
+          Count(Seq(rowVersionExpr)).toAggregateExpression(), windowSpec), HelperColumn.RvCnt)())
+    } else {
+      Seq.empty
+    }
+    Window(baseAliases ++ rowVersionAliases, partitionByCols, Nil, plan)
+  }
+
+  /**
+   * Builds the carry-over pair predicate against the helper attributes exposed by
+   * `input`: a pair is a CoW carry-over iff
+   * `_del_cnt = 1 AND _ins_cnt = 1 AND _rv_cnt = 2 AND _min_rv = _max_rv`. The
+   * `_rv_cnt = 2` clause guards against a NULL rowVersion silently matching
+   * `_min_rv = _max_rv` (Spark's min/max skip NULLs).
+   *
+   * Used by both the batch path (`addCarryOverPairFilter` over a Window child) and the
+   * streaming path (in `addStreamingRowLevelPostProcessing` over an Aggregate child).
+   * The helper-attribute layout is the same in both cases.
+   */
+  private def buildCarryOverPairPredicate(input: LogicalPlan): Expression = {
+    val delCnt = getAttribute(input, HelperColumn.DelCnt)
+    val insCnt = getAttribute(input, HelperColumn.InsCnt)
+    val minRv = getAttribute(input, HelperColumn.MinRv)
+    val maxRv = getAttribute(input, HelperColumn.MaxRv)
+    val rvCnt = getAttribute(input, HelperColumn.RvCnt)
+    And(
+      And(EqualTo(delCnt, Literal(1L)), EqualTo(insCnt, Literal(1L))),
+      And(EqualTo(rvCnt, Literal(2L)), EqualTo(minRv, maxRv)))
+  }
+
+  /**
+   * Adds a Filter node that drops rows belonging to a CoW carry-over pair, using the
+   * shared `buildCarryOverPairPredicate`.
+   */
+  private def addCarryOverPairFilter(input: LogicalPlan): LogicalPlan = {
+    Filter(Not(buildCarryOverPairPredicate(input)), input)
+  }
+
+  /**
+   * Adds a Project node that rewrites `_change_type` to `update_preimage` /
+   * `update_postimage` whenever a delete+insert pair is present in the partition.
+   * Expects the input to expose `_del_cnt` and `_ins_cnt`.
+   */
+  private def addUpdateRelabelProjection(input: LogicalPlan): LogicalPlan = {
+    val changeTypeAttr = getAttribute(input, "_change_type")
+    val delCnt = getAttribute(input, HelperColumn.DelCnt)
+    val insCnt = getAttribute(input, HelperColumn.InsCnt)
+
+    val isUpdate = And(
+      EqualTo(delCnt, Literal(1L)),
+      EqualTo(insCnt, Literal(1L)))
+    val isInvalid = Or(GreaterThan(delCnt, Literal(1L)), GreaterThan(insCnt, Literal(1L)))
+    val updateType = If(EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_INSERT)),
+      Literal(Changelog.CHANGE_TYPE_UPDATE_POSTIMAGE),
+      Literal(Changelog.CHANGE_TYPE_UPDATE_PREIMAGE))
+
+    val raiseInvalid = RaiseError(
+      Literal("CHANGELOG_CONTRACT_VIOLATION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION"),
+      CreateMap(Nil),
+      StringType)
+    val caseExpr = CaseWhen(Seq(isInvalid -> raiseInvalid, isUpdate -> updateType), changeTypeAttr)
+
+    val projectList = input.output.map { attr =>
+      if (attr.name == "_change_type") Alias(caseExpr, "_change_type")()
+      else attr
+    }
+    Project(projectList, input)
+  }
+
+  // ---------------------------------------------------------------------------
+  // Net Change Computation
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Collapses multiple changes per row identity across versions into the net effect:
+   *
+   * | existedBefore | existsAfter | output                              |
+   * |---------------|-------------|-------------------------------------|
+   * | false         | false       | (cancel)                            |
+   * | false         | true        | insert                              |
+   * | true          | false       | delete                              |
+   * | true          | true        | update_preimage + update_postimage  |
+   *
+   * If `computeUpdates = false`, the `update_preimage` + `update_postimage` pair is
+   * emitted as `delete` + `insert` instead.
+   *
+   * `existedBefore` is true iff the partition's first event is `delete` or
+   * `update_preimage`. `existsAfter` is true iff the partition's last event is
+   * `insert` or `update_postimage`.
+   *
+   * Pipeline: Window (per-rowId aggregates, sort by version) -> Filter (keep first/last per
+   * partition) -> Project (relabel `_change_type` and drop helper columns).
+   */
+  private def injectNetChangeComputation(
+      plan: LogicalPlan,
+      rowIdExprs: Seq[NamedExpression],
+      computeUpdates: Boolean): LogicalPlan = {
+    val windowedPlan = addNetChangesWindow(plan, rowIdExprs)
+    val filteredAndRelabeledPlan =
+      removeIntermediateChangelogEntriesAndRelabelChangeTypes(windowedPlan, computeUpdates)
+    filteredAndRelabeledPlan
+  }
+
+  /**
+   * Streaming counterpart of [[injectNetChangeComputation]]. The batch version uses a
+   * Catalyst `Window` partitioned by `rowId`, which is rejected on streaming queries.
+   * This version delegates the per-`rowId` first/last extraction and the SPIP collapse
+   * matrix to a [[CdcNetChangesStatefulProcessor]] driven by `transformWithState`:
+   *
+   *  1. [[EventTimeWatermark]] on `_commit_timestamp` (zero delay) so the global query
+   *     watermark advances with each batch. When this rewrite runs on top of the row-level
+   *     post-processing rewrite (combined `containsCarryoverRows` /
+   *     `representsUpdateAsDeleteAndInsert` + `containsIntermediateChanges` path), the
+   *     row-level rewrite has already injected an identical `EventTimeWatermark` and we
+   *     reuse it instead of stacking a second one. Stacking watermarks on the same column
+   *     fails the multi-watermark check unless `STATEFUL_OPERATOR_ALLOW_MULTIPLE` is set,
+   *     and even then it would just produce two redundant nodes.
+   *  2. [[Project]] that aliases each rowId expression to a top-level helper column. This
+   *     lets us address the rowId as an `Attribute` for the `transformWithState` grouping,
+   *     which in turn makes nested rowId paths (e.g. `payload.id`) work without special
+   *     casing.
+   *  3. [[TransformWithState]] keyed by the rowId helper attributes, in
+   *     [[org.apache.spark.sql.catalyst.plans.logical.EventTime]] mode. The processor
+   *     buffers the first and last event per row identity; an event-time timer set to the
+   *     latest observed `_commit_timestamp` fires once the global watermark advances past
+   *     it, at which point the processor evaluates the SPIP `(existedBefore, existsAfter)`
+   *     matrix and emits 0, 1, or 2 output rows.
+   *  4. [[SerializeFromObject]] (added by the `transformWithState` factory) brings the
+   *     processor's `Row` outputs back into a regular tabular shape.
+   *  5. [[Project]] (via [[stripCommitTimestampWatermarkMetadata]]) clears the
+   *     auto-injected `EventTimeWatermark.delayKey` metadata from the user-visible
+   *     `_commit_timestamp`. The metadata is preserved through the `transformWithState`
+   *     encoder roundtrip and would otherwise interact with downstream user-supplied
+   *     watermarks via the global multi-watermark policy.
+   *  6. Final [[Project]] drops the rowId helper columns so the user-visible schema
+   *     matches the connector's declared changelog schema.
+   *
+   * Streaming netChanges is incremental, not range-scoped: per-row-identity state is
+   * cleared on emission, so a later commit on the same identity starts a fresh window
+   * and produces additional output rows. Batch netChanges over the same version range
+   * would have collapsed those changes; streaming cannot retract already-emitted rows
+   * to match that. End-of-stream flushes all pending timers, so a bounded stream's
+   * output matches batch only when no row identity is touched again after its first
+   * emission.
+   */
+  private def addStreamingNetChangeComputation(
+      plan: LogicalPlan,
+      cl: Changelog,
+      computeUpdates: Boolean): LogicalPlan = {
+    // 1. Inject (or reuse, if already injected by the row-level rewrite) a watermark on
+    //    `_commit_timestamp`. The row-level rewrite already adds one with zero delay, so
+    //    we only add it when no watermark is present in the lineage to avoid stacking
+    //    EventTimeWatermark nodes (which is rejected by the multi-watermark check
+    //    unless STATEFUL_OPERATOR_ALLOW_MULTIPLE is set).
+    val needsWatermark = !plan.exists {
+      case _: EventTimeWatermark => true
+      case _ => false
+    }
+    val watermarked: LogicalPlan = if (needsWatermark) {
+      val rawCommitTsAttr = getAttribute(plan, "_commit_timestamp")
+      EventTimeWatermark(
+        UUID.randomUUID(), rawCommitTsAttr, new CalendarInterval(0, 0, 0L), plan)
+    } else plan
+
+    // 2. Resolve rowId expressions against the watermarked plan. Resolving here (after
+    //    any preceding row-level rewrite) ensures the attribute ExprIds match the
+    //    columns in `plan.output` -- name-based resolution recovers them by their
+    //    connector-declared names. Then project them to top-level helper columns so
+    //    they can be referenced as plain Attributes by `transformWithState`'s grouping.
+    val rowIdExprs =
+      V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, watermarked)
+    val rowIdHelpers: Seq[Alias] = rowIdExprs.zipWithIndex.map { case (expr, idx) =>
+      Alias(expr, NetChangesHelperColumns.rowIdColumn(idx))()
+    }
+    val originalCols: Seq[Attribute] = watermarked.output
+    val withHelpers = Project(originalCols ++ rowIdHelpers, watermarked)
+
+    // 3. Build the input/output Row encoder for the processor. The schema is the
+    //    watermarked plan's schema plus the rowId helper columns.
+    val processorInputSchema = StructType(
+      withHelpers.output.map { a =>
+        StructField(a.name, a.dataType, a.nullable, a.metadata)
+      })
+    val rowEncoder = ExpressionEncoder(processorInputSchema)
+    val groupingAttrs: Seq[Attribute] = rowIdHelpers.map(_.toAttribute)
+    val keyEncoder = ExpressionEncoder(StructType(rowIdHelpers.map { a =>
+      StructField(a.name, a.dataType, a.nullable, a.metadata)
+    }))
+
+    val processor = new CdcNetChangesStatefulProcessor(processorInputSchema, computeUpdates)
+
+    val tws = new TransformWithState(
+      keyDeserializer = UnresolvedDeserializer(keyEncoder.deserializer, groupingAttrs),
+      valueDeserializer = UnresolvedDeserializer(rowEncoder.deserializer, withHelpers.output),
+      groupingAttributes = groupingAttrs,
+      dataAttributes = withHelpers.output,
+      statefulProcessor = processor.asInstanceOf[StatefulProcessor[Any, Any, Any]],
+      timeMode = EventTime,
+      outputMode = OutputMode.Append(),
+      keyEncoder = keyEncoder.asInstanceOf[ExpressionEncoder[Any]],
+      outputObjAttr = CatalystSerde.generateObjAttr(rowEncoder),
+      child = withHelpers,
+      hasInitialState = false,
+      initialStateGroupingAttrs = groupingAttrs,
+      initialStateDataAttrs = withHelpers.output,
+      initialStateDeserializer = UnresolvedDeserializer(keyEncoder.deserializer, groupingAttrs),
+      initialState = LocalRelation(keyEncoder.schema))
+
+    // 4. Wrap with SerializeFromObject so the obj column becomes regular tabular output.
+    val serialized = CatalystSerde.serialize(tws)(rowEncoder)
+
+    // 5. Strip the auto-injected EventTimeWatermark metadata from the user-visible
+    //    `_commit_timestamp`. The metadata is preserved through the `transformWithState`
+    //    encoder roundtrip (the encoder schema carries StructField metadata), so we
+    //    must clear it here at the boundary of the rewrite -- otherwise downstream
+    //    user-supplied watermarks would interact with our internal watermark via the
+    //    global multi-watermark policy. Mirrors the row-level path's call at the end
+    //    of `addStreamingRowLevelPostProcessing`.
+    val cleaned = stripCommitTimestampWatermarkMetadata(serialized)
+
+    // 6. Drop the rowId helper columns so the final output matches the connector's schema.
+    val helperNames = rowIdHelpers.map(_.name).toSet
+    Project(cleaned.output.filterNot(a => helperNames.contains(a.name)), cleaned)
+  }
+
+  /**
+   * Adds a Window node partitioned by `rowId` and ordered by
+   * `(_commit_version, change_type_rank)` where pre-events (`update_preimage`,
+   * `delete`) sort before post-events (`update_postimage`, `insert`) within the same
+   * commit. Computes per-partition helper columns:
+   *   - `__spark_cdc_row_number` (1..n) answers: "is this the first or last row?".
+   *   - `__spark_cdc_row_count` is the partition size which combined with row_number is
+   *     used to detect the last row.
+   *   - `__spark_cdc_first_row_change_type_value` and
+   *     `__spark_cdc_last_row_change_type_value` drive the first/last classification at
+   *     filter and relabel time.
+   */
+  private def addNetChangesWindow(
+      plan: LogicalPlan,
+      rowIdExprs: Seq[NamedExpression]): LogicalPlan = {
+    val changeTypeAttr = getAttribute(plan, "_change_type")
+    val commitVersionAttr = getAttribute(plan, "_commit_version")
+    val raiseUnexpectedChangeType = RaiseError(
+      Literal("CHANGELOG_CONTRACT_VIOLATION.UNEXPECTED_CHANGE_TYPE"),
+      CreateMap(Nil),
+      IntegerType)
+    val changeTypeRank = CaseWhen(Seq(
+      EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_UPDATE_PREIMAGE)) -> Literal(0),
+      EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_DELETE)) -> Literal(0),
+      EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_INSERT)) -> Literal(1),
+      EqualTo(changeTypeAttr, Literal(Changelog.CHANGE_TYPE_UPDATE_POSTIMAGE)) -> Literal(1)),
+      raiseUnexpectedChangeType)
+    val partitionByCols = rowIdExprs
+    val orderSpec = Seq(
+      SortOrder(commitVersionAttr, Ascending),
+      SortOrder(changeTypeRank, Ascending))
+    val rowNumberWindowSpec = WindowSpecDefinition(
+      partitionByCols, orderSpec,
+      UnspecifiedFrame)
+    val aggregateWindowSpec = WindowSpecDefinition(
+      partitionByCols, orderSpec,
+      SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing))
+
+    val rowNumberAlias = Alias(
+      WindowExpression(RowNumber(), rowNumberWindowSpec),
+      NetChangesHelperColumns.RowNumber)()
+    val rowCountAlias = Alias(
+      WindowExpression(Count(Seq(Literal(1))).toAggregateExpression(), aggregateWindowSpec),
+      NetChangesHelperColumns.RowCount)()
+    val firstRowChangeTypeValueAlias = Alias(
+      WindowExpression(
+        First(changeTypeAttr, ignoreNulls = false).toAggregateExpression(),
+        aggregateWindowSpec),
+      NetChangesHelperColumns.FirstRowChangeTypeValue)()
+    val lastRowChangeTypeValueAlias = Alias(
+      WindowExpression(
+        Last(changeTypeAttr, ignoreNulls = false).toAggregateExpression(),
+        aggregateWindowSpec),
+      NetChangesHelperColumns.LastRowChangeTypeValue)()
+
+    Window(
+      Seq(rowNumberAlias, rowCountAlias, firstRowChangeTypeValueAlias,
+        lastRowChangeTypeValueAlias),
+      partitionByCols, orderSpec, plan)
+  }
+
+  /**
+   * Filters and relabels the windowed plan: keeps only the first and/or last row per
+   * `rowId` partition, then rewrites the surviving rows' `_change_type` and drops the
+   * helper columns.
+   *
+   * | existedBefore | existsAfter | output                              |
+   * |---------------|-------------|-------------------------------------|
+   * | false         | false       | (cancel)                            |
+   * | false         | true        | insert                              |
+   * | true          | false       | delete                              |
+   * | true          | true        | update_preimage + update_postimage  |
+   *
+   * If `computeUpdates = false`, the `update_preimage` + `update_postimage` pair is
+   * emitted as `delete` + `insert` instead.
+   *
+   * `existedBefore` is true iff the partition's first event is `delete` or
+   * `update_preimage`. `existsAfter` is true iff the partition's last event is
+   * `insert` or `update_postimage`.
+   *
+   * Helper columns (`__spark_cdc_*`) are dropped in the same Project that does the
+   * relabel, saving a follow-up cleanup pass.
+   */
+  private def removeIntermediateChangelogEntriesAndRelabelChangeTypes(
+       windowedPlan: LogicalPlan,
+       computeUpdates: Boolean
+     ): LogicalPlan = {
+    val rowNumberAttr = getAttribute(windowedPlan, NetChangesHelperColumns.RowNumber)
+    val rowCountAttr = getAttribute(windowedPlan, NetChangesHelperColumns.RowCount)
+    val firstRowChangeTypeAttr =
+      getAttribute(windowedPlan, NetChangesHelperColumns.FirstRowChangeTypeValue)
+    val lastRowChangeTypeAttr =
+      getAttribute(windowedPlan, NetChangesHelperColumns.LastRowChangeTypeValue)
+
+    val existedBeforeVersionRange = In(firstRowChangeTypeAttr, Seq(
+      Literal(Changelog.CHANGE_TYPE_DELETE),
+      Literal(Changelog.CHANGE_TYPE_UPDATE_PREIMAGE)))
+    val existsAfterVersionRange = In(lastRowChangeTypeAttr, Seq(
+      Literal(Changelog.CHANGE_TYPE_INSERT),
+      Literal(Changelog.CHANGE_TYPE_UPDATE_POSTIMAGE)))
+
+    val isFirst = EqualTo(rowNumberAttr, Literal(1))
+    val isLast = EqualTo(rowNumberAttr, rowCountAttr)
+
+    // only keep first and last entry per set of changes for a rowId, order of cases is important!
+    val keep = CaseWhen(Seq(
+      // filter out if inserted and deleted within range
+      And(Not(existedBeforeVersionRange), Not(existsAfterVersionRange)) -> Literal(false),
+      // for persisting new row keep only last state
+      And(Not(existedBeforeVersionRange), existsAfterVersionRange) -> isLast,
+      // for previously existing row keep first state
+      And(existedBeforeVersionRange, Not(existsAfterVersionRange)) -> isFirst),
+      // for persisting row keep first and last state
+      // existedBeforeVersionRange = true, existsAfterVersionRange = true
+      Or(isFirst, isLast))
+
+    val filteredPlan = Filter(keep, windowedPlan)
+
+    val computedPreUpdateLabel =
+      if (computeUpdates) Literal(Changelog.CHANGE_TYPE_UPDATE_PREIMAGE)
+      else Literal(Changelog.CHANGE_TYPE_DELETE)
+    val computedPostUpdateLabel =
+      if (computeUpdates) Literal(Changelog.CHANGE_TYPE_UPDATE_POSTIMAGE)
+      else Literal(Changelog.CHANGE_TYPE_INSERT)
+
+    val changeTypeAttr = getAttribute(filteredPlan, "_change_type")
+
+    // Each case relabels the kept row(s) to match the required output label. The tuple
+    // is (first event, last event) of the partition; cases below assume computeUpdates=true.
+    //   Case 1 (insert, update_postimage): keep update_postimage; relabel it to insert.
+    //   Case 2 (update_preimage, delete): keep update_preimage; relabel it to delete.
+    //   Case 3 (delete, update_postimage): keep delete and update_postimage; relabel delete to
+    //           update_preimage.
+    //   Case 4 (update_preimage, insert): keep update_preimage and insert; relabel insert to
+    //           update_postimage.
+    // No-op cases (e.g. (insert, insert)) are not listed. If computeUpdates=false insert/deletes
+    // will be used instead of update_pre/postimage.
+    val relabel = CaseWhen(Seq(
+      And(Not(existedBeforeVersionRange), isLast) -> Literal(Changelog.CHANGE_TYPE_INSERT),
+      And(Not(existsAfterVersionRange), isFirst) -> Literal(Changelog.CHANGE_TYPE_DELETE),
+      And(And(existedBeforeVersionRange, existsAfterVersionRange), isFirst)
+        -> computedPreUpdateLabel,
+      And(And(existedBeforeVersionRange, existsAfterVersionRange), isLast)
+        -> computedPostUpdateLabel),
+      changeTypeAttr)
+
+    val projectList = filteredPlan.output.flatMap { attr =>
+      if (NetChangesHelperColumns.all.contains(attr.name)) None
+      else if (attr.name == "_change_type") Some(Alias(relabel, "_change_type")())
+      else Some(attr)
+    }
+
+    val projectedPlan = Project(projectList, filteredPlan)
+    projectedPlan
+  }
+
+  // ---------------------------------------------------------------------------
+  // Helpers
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Removes any helper columns (see [[HelperColumn]]) that earlier steps added to the
+   * plan. Helper columns not present in the input are silently ignored, so this method
+   * can be applied unconditionally regardless of which post-processing steps ran.
+   */
+  private def removeHelperColumns(input: LogicalPlan): LogicalPlan = {
+    Project(input.output.filterNot(a => HelperColumn.all.contains(a.name)), input)
+  }
+
+  /**
+   * Looks up an attribute by name in a plan's output. Throws a clear error if missing --
+   * used for required columns like `_change_type` / `_commit_version` / helper columns
+   * added by earlier steps; a missing column is always a programming error.
+   */
+  private def getAttribute(plan: LogicalPlan, name: String): Attribute =
+    plan.output.find(_.name == name).getOrElse(
+      throw new IllegalStateException(
+        s"Required column '$name' not found in plan output: " +
+          plan.output.map(_.name).mkString(", ")))
+}
+
+/**
+ * Side-effecting Boolean expression: returns `true` if the child is non-NULL and throws
+ * `CHANGELOG_CONTRACT_VIOLATION.NULL_COMMIT_TIMESTAMP` if the child is NULL. Used as the
+ * predicate of the streaming row-level rewrite's NULL guard `Filter`.
+ *
+ * The point of this dedicated expression is to remain in the plan no matter what the
+ * connector declares for `_commit_timestamp.nullable`: Spark's `NullPropagation` rules
+ * (`Optimizer.scala`'s `expressions.scala:920-926`) rewrite `IsNull(c) -> false` and
+ * eliminate `AssertNotNull(c)` whenever `c.nullable` is `false`. A connector that
+ * declares `_commit_timestamp` non-nullable but emits NULL at runtime would slip past
+ * those simpler shapes; this class is unrecognised by `NullPropagation` so the runtime
+ * check stays put.
+ */
+case class CdcAssertCommitTimestampNotNull(child: Expression)
+    extends UnaryExpression
+    with CodegenFallback
+    with NonSQLExpression {
+
+  override def dataType: DataType = BooleanType
+  override def foldable: Boolean = false
+  override def nullable: Boolean = false
+
+  override def eval(input: InternalRow): Any = {
+    if (child.eval(input) == null) {
+      throw new SparkRuntimeException(
+        errorClass = "CHANGELOG_CONTRACT_VIOLATION.NULL_COMMIT_TIMESTAMP",
+        messageParameters = Map.empty)
+    }
+    true
+  }
+
+  override protected def withNewChildInternal(
+      newChild: Expression): CdcAssertCommitTimestampNotNull =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala
index 55622637f3046..34942fcf08bcc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveFetchCursor.scala
@@ -34,7 +34,8 @@ class ResolveFetchCursor(val catalogManager: CatalogManager) extends Rule[Logica
   with ColumnResolutionHelper {
   // VariableResolution looks up both scripting local variables (via SqlScriptingContextManager)
   // and session variables (via tempVariableManager), checking local variables first.
-  private val variableResolution = new VariableResolution(catalogManager.tempVariableManager)
+  private val variableResolution =
+    new VariableResolution(catalogManager.tempVariableManager, catalogManager)
 
   /**
    * Checks for duplicate variable names and throws an exception if found.
@@ -63,7 +64,8 @@ class ResolveFetchCursor(val catalogManager: CatalogManager) extends Rule[Logica
           nameParts = u.nameParts
         ) match {
           case Some(variable) => variable.copy(canFold = false)
-          case _ => throw unresolvedVariableError(u.nameParts, Seq("SYSTEM", "SESSION"))
+          case _ => throw unresolvedVariableError(
+            u.nameParts, variableResolution.searchPathEntriesForError, u.origin)
         }
 
       case other => throw SparkException.internalError(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
index 7150c81ad64ec..cfa6f33588062 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql.catalyst.analysis
 
 import scala.collection.mutable
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression, VariableReference}
-import org.apache.spark.sql.catalyst.plans.logical.{CreateView, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{CreateView, InsertIntoStatement, LogicalPlan, V2WriteCommand}
 import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -70,6 +71,39 @@ class ResolveIdentifierClause(earlyBatches: Seq[RuleExecutor[LogicalPlan]#Batch]
 
         executor.execute(p.planBuilder.apply(
           IdentifierResolution.evalIdentifierExpr(p.identifierExpr), p.children))
+      // `InsertIntoStatement.table` and `V2WriteCommand.table` are non-child LogicalPlan slots
+      // (`child = query`), so the standard `resolveOperatorsUp` traversal never visits
+      // placeholders inside them. Materialize them explicitly. Only `InsertIntoStatement` and
+      // `OverwriteByExpression` carry a parse-time placeholder today, but matching the
+      // `V2WriteCommand` trait keeps the rule consistent across the family.
+      case i: InsertIntoStatement if i.table.isInstanceOf[PlanWithUnresolvedIdentifier] =>
+        val p = i.table.asInstanceOf[PlanWithUnresolvedIdentifier]
+        if (p.identifierExpr.resolved && p.childrenResolved) {
+          if (referredTempVars.isDefined) {
+            referredTempVars.get ++= collectTemporaryVariablesInLogicalPlan(p)
+          }
+          i.copy(table = executor.execute(p.planBuilder.apply(
+            IdentifierResolution.evalIdentifierExpr(p.identifierExpr), p.children)))
+        } else {
+          i
+        }
+      case w: V2WriteCommand if w.table.isInstanceOf[PlanWithUnresolvedIdentifier] =>
+        val p = w.table.asInstanceOf[PlanWithUnresolvedIdentifier]
+        if (p.identifierExpr.resolved && p.childrenResolved) {
+          if (referredTempVars.isDefined) {
+            referredTempVars.get ++= collectTemporaryVariablesInLogicalPlan(p)
+          }
+          executor.execute(p.planBuilder.apply(
+            IdentifierResolution.evalIdentifierExpr(p.identifierExpr), p.children)) match {
+            case nr: NamedRelation => w.withNewTable(nr)
+            case other =>
+              throw SparkException.internalError(
+                "PlanWithUnresolvedIdentifier in V2WriteCommand.table must materialize " +
+                  s"into a NamedRelation, but got: ${other.getClass.getName}")
+          }
+        } else {
+          w
+        }
       case other =>
         other.transformExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
           case e: ExpressionWithUnresolvedIdentifier if e.identifierExpr.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala
index 4b16448641bc1..ab80fc829cf47 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSetVariable.scala
@@ -33,7 +33,8 @@ import org.apache.spark.sql.types.IntegerType
  */
 class ResolveSetVariable(val catalogManager: CatalogManager) extends Rule[LogicalPlan]
   with ColumnResolutionHelper {
-  private val variableResolution = new VariableResolution(catalogManager.tempVariableManager)
+  private val variableResolution =
+    new VariableResolution(catalogManager.tempVariableManager, catalogManager)
 
   /**
    * Checks for duplicate variable names and throws an exception if found.
@@ -61,7 +62,11 @@ class ResolveSetVariable(val catalogManager: CatalogManager) extends Rule[Logica
             nameParts = u.nameParts
           ) match {
             case Some(variable) => variable.copy(canFold = false)
-            case _ => throw unresolvedVariableError(u.nameParts, Seq("SYSTEM", "SESSION"))
+            case _ =>
+              throw unresolvedVariableError(
+                u.nameParts,
+                variableResolution.searchPathEntriesForError,
+                u.origin)
           }
 
         case other => throw SparkException.internalError(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
index f21f53a28300d..8281f89bd2e8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, Attribu
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, JoinType, LeftAnti, LeftOuter, RightOuter}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, DeleteAction, Filter, HintInfo, InsertAction, Join, JoinHint, LogicalPlan, MergeAction, MergeIntoTable, MergeRows, NO_BROADCAST_AND_REPLICATION, Project, ReplaceData, UpdateAction, WriteDelta}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteAction, Filter, HintInfo, InsertAction, InsertOnlyMerge, Join, JoinHint, LogicalPlan, MergeAction, MergeIntoTable, MergeRows, NO_BROADCAST_AND_REPLICATION, Project, ReplaceData, UpdateAction, WriteDelta}
 import org.apache.spark.sql.catalyst.plans.logical.MergeRows.{Copy, Delete, Discard, Insert, Instruction, Keep, ROW_ID, Split, Update}
 import org.apache.spark.sql.catalyst.util.RowDeltaUtils.{COPY_OPERATION, INSERT_OPERATION, OPERATION_COLUMN, UPDATE_OPERATION}
 import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations
@@ -73,7 +73,7 @@ object RewriteMergeIntoTable extends RewriteRowLevelCommand with PredicateHelper
           }
           val project = Project(projectList, joinPlan)
 
-          AppendData.byPosition(r, project)
+          InsertOnlyMerge(r, project)
 
         case _ =>
           m
@@ -114,7 +114,7 @@ object RewriteMergeIntoTable extends RewriteRowLevelCommand with PredicateHelper
             output = generateExpandOutput(r.output, outputs),
             joinPlan)
 
-          AppendData.byPosition(r, mergeRows)
+          InsertOnlyMerge(r, mergeRows)
 
         case _ =>
           m
@@ -295,7 +295,12 @@ object RewriteMergeIntoTable extends RewriteRowLevelCommand with PredicateHelper
     // build a plan to write the row delta to the table
     val writeRelation = relation.copy(table = operationTable)
     val projections = buildWriteDeltaProjections(mergeRowsPlan, rowAttrs, rowIdAttrs, metadataAttrs)
-    WriteDelta(writeRelation, cond, mergeRowsPlan, relation, projections)
+    val groupFilterCond = if (notMatchedBySourceActions.isEmpty && groupFilterEnabled) {
+      Some(toGroupFilterCondition(relation, source, cond))
+    } else {
+      None
+    }
+    WriteDelta(writeRelation, cond, mergeRowsPlan, relation, projections, groupFilterCond)
   }
 
   private def chooseWriteDeltaJoinType(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala
index 3c41b6bfa5683..f235374bd5d6f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteUpdateTable.scala
@@ -174,7 +174,8 @@ object RewriteUpdateTable extends RewriteRowLevelCommand {
     // build a plan to write the row delta to the table
     val writeRelation = relation.copy(table = operationTable)
     val projections = buildWriteDeltaProjections(rowDeltaPlan, rowAttrs, rowIdAttrs, metadataAttrs)
-    WriteDelta(writeRelation, cond, rowDeltaPlan, relation, projections)
+    val groupFilterCond = if (groupFilterEnabled) Some(cond) else None
+    WriteDelta(writeRelation, cond, rowDeltaPlan, relation, projections, groupFilterCond)
   }
 
   // this method assumes the assignments have been already aligned before
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index 7eacc5ab9b2ad..aa379a63a2af9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -41,9 +41,10 @@ object TableOutputResolver extends SQLConfHelper with Logging {
 
   /**
    * Modes for filling in default or null values for missing columns.
-   * If FILL, fill missing top-level columns with their default values.
-   * If RECURSE, fill missing top-level columns and also recurse into nested struct
-   * fields to fill null.
+   * If FILL, fill missing top-level columns with their default values (by-name reorder path).
+   * If RECURSE, fill missing top-level columns (including trailing columns on the by-position
+   * path for INSERT with schema evolution when enabled) and recurse into nested structs,
+   * arrays, and maps to fill missing struct fields with null or defaults.
    * If NONE, do not fill any missing columns.
    */
   object DefaultValueFillMode extends Enumeration {
@@ -92,19 +93,22 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       query: LogicalPlan,
       byName: Boolean,
       conf: SQLConf,
-      supportColDefaultValue: Boolean = false): LogicalPlan = {
+      defaultValueFillMode: DefaultValueFillMode.Value = NONE): LogicalPlan = {
 
     if (expected.size < query.output.size) {
       throw QueryCompilationErrors.cannotWriteTooManyColumnsToTableError(
         tableName, expected.map(_.name), query.output)
     }
 
+    // In RECURSE mode, allow fewer source columns than target by filling trailing columns
+    // with defaults. In other modes, a column count mismatch in by-position resolution is
+    // an error.
+    val fillDefaultValue = defaultValueFillMode == RECURSE
     val errors = new mutable.ArrayBuffer[String]()
     val resolved: Seq[NamedExpression] = if (byName) {
-      // If a top-level column does not have a corresponding value in the input query, fill with
-      // the column's default value. We need to pass `fillDefaultValue` as FILL here, if the
-      // `supportColDefaultValue` parameter is also true.
-      val defaultValueFillMode = if (supportColDefaultValue) FILL else NONE
+      // By-name resolution: the defaultValueFillMode is passed through to control whether
+      // missing top-level columns are filled (FILL/RECURSE) and whether missing nested
+      // struct fields are also filled (RECURSE only).
       reorderColumnsByName(
         tableName,
         query.output,
@@ -112,13 +116,15 @@ object TableOutputResolver extends SQLConfHelper with Logging {
         conf,
         errors += _,
         Nil,
-        defaultValueFillMode)
+        defaultValueFillMode,
+        enforceFullOutput = true)
     } else {
-      if (expected.size > query.output.size) {
+      if (expected.size > query.output.size && !fillDefaultValue) {
         throw QueryCompilationErrors.cannotWriteNotEnoughColumnsToTableError(
           tableName, expected.map(_.name), query.output)
       }
-      resolveColumnsByPosition(tableName, query.output, expected, conf, errors += _)
+      resolveColumnsByPosition(
+        tableName, query.output, expected, conf, errors += _, fillDefaultValue = fillDefaultValue)
     }
 
     if (errors.nonEmpty) {
@@ -157,17 +163,17 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       case (valueType: StructType, colType: StructType) =>
         val resolvedValue = resolveStructType(
           tableName, value, valueType, col, colType,
-          byName = true, conf, addError, colPath, fillChildDefaultValue)
+          byName = true, conf, addError, colPath, fillChildDefaultValue, enforceFullOutput = false)
         resolvedValue.getOrElse(value)
       case (valueType: ArrayType, colType: ArrayType) =>
         val resolvedValue = resolveArrayType(
           tableName, value, valueType, col, colType,
-          byName = true, conf, addError, colPath, fillChildDefaultValue)
+          byName = true, conf, addError, colPath, fillChildDefaultValue, enforceFullOutput = false)
         resolvedValue.getOrElse(value)
       case (valueType: MapType, colType: MapType) =>
         val resolvedValue = resolveMapType(
           tableName, value, valueType, col, colType,
-          byName = true, conf, addError, colPath, fillChildDefaultValue)
+          byName = true, conf, addError, colPath, fillChildDefaultValue, enforceFullOutput = false)
         resolvedValue.getOrElse(value)
       case _ =>
         checkUpdate(tableName, value, col, conf, addError, colPath)
@@ -278,6 +284,32 @@ object TableOutputResolver extends SQLConfHelper with Logging {
     }
   }
 
+  /**
+   * Builds the [[NamedExpression]] for a missing column filled with its default value, applying a
+   * write-side CHAR/VARCHAR length check so that non-foldable defaults (e.g. `current_user()`)
+   * that exceed the column length are caught at runtime. Uses `getRawType` so it works for both
+   * V1 and V2 tables. Shared by the by-name and by-position default-fill paths.
+   *
+   * We unwrap the default's outer alias before the length check so the check wraps the
+   * default value itself, not the alias; `applyColumnMetadata` then re-adds the required
+   * alias and metadata afterward.
+   */
+  private def applyDefaultWithLengthCheck(
+      defaultExpr: Expression,
+      expectedCol: Attribute,
+      conf: SQLConf): NamedExpression = {
+    val rawType = CharVarcharUtils.getRawType(expectedCol.metadata).getOrElse(expectedCol.dataType)
+    val checked = if (!conf.charVarcharAsString && CharVarcharUtils.hasCharVarchar(rawType)) {
+      val value = defaultExpr match {
+        case a: Alias => a.child
+        case other => other
+      }
+      CharVarcharUtils.stringLengthCheck(value, rawType)
+    } else {
+      defaultExpr
+    }
+    applyColumnMetadata(checked, expectedCol)
+  }
 
   private def canWrite(
       tableName: String,
@@ -304,7 +336,8 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       conf: SQLConf,
       addError: String => Unit,
       colPath: Seq[String] = Nil,
-      defaultValueFillMode: DefaultValueFillMode.Value): Seq[NamedExpression] = {
+      defaultValueFillMode: DefaultValueFillMode.Value,
+      enforceFullOutput: Boolean = false): Seq[NamedExpression] = {
     val matchedCols = mutable.HashSet.empty[String]
     val reordered = expectedCols.flatMap { expectedCol =>
       val matched = inputCols.filter(col => conf.resolver(col.name, expectedCol.name))
@@ -320,7 +353,7 @@ object TableOutputResolver extends SQLConfHelper with Logging {
             tableName, newColPath.quoted
           )
         }
-        Some(applyColumnMetadata(defaultExpr.get, expectedCol))
+        Some(applyDefaultWithLengthCheck(defaultExpr.get, expectedCol, conf))
       } else if (matched.length > 1) {
         throw QueryCompilationErrors.incompatibleDataToTableAmbiguousColumnNameError(
           tableName, newColPath.quoted
@@ -336,15 +369,15 @@ object TableOutputResolver extends SQLConfHelper with Logging {
           case (matchedType: StructType, expectedType: StructType) =>
             resolveStructType(
               tableName, matchedCol, matchedType, actualExpectedCol, expectedType,
-              byName = true, conf, addError, newColPath, childFillDefaultValue)
+              byName = true, conf, addError, newColPath, childFillDefaultValue, enforceFullOutput)
           case (matchedType: ArrayType, expectedType: ArrayType) =>
             resolveArrayType(
               tableName, matchedCol, matchedType, actualExpectedCol, expectedType,
-              byName = true, conf, addError, newColPath, childFillDefaultValue)
+              byName = true, conf, addError, newColPath, childFillDefaultValue, enforceFullOutput)
           case (matchedType: MapType, expectedType: MapType) =>
             resolveMapType(
               tableName, matchedCol, matchedType, actualExpectedCol, expectedType,
-              byName = true, conf, addError, newColPath, childFillDefaultValue)
+              byName = true, conf, addError, newColPath, childFillDefaultValue, enforceFullOutput)
           case _ =>
             checkField(
               tableName, actualExpectedCol, matchedCol, byName = true, conf, addError, newColPath)
@@ -366,6 +399,11 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       } else {
         reordered
       }
+    } else if (enforceFullOutput) {
+      val colName =
+        if (colPath.nonEmpty) colPath.quoted
+        else expectedCols.map(_.name).map(toSQLId).mkString(", ")
+      throw QueryCompilationErrors.incompatibleDataToTableCannotFindDataError(tableName, colName)
     } else {
       Nil
     }
@@ -377,7 +415,8 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       expectedCols: Seq[Attribute],
       conf: SQLConf,
       addError: String => Unit,
-      colPath: Seq[String] = Nil): Seq[NamedExpression] = {
+      colPath: Seq[String] = Nil,
+      fillDefaultValue: Boolean = false): Seq[NamedExpression] = {
     val actualExpectedCols = expectedCols.map { attr =>
       attr.withDataType { CharVarcharUtils.getRawType(attr.metadata).getOrElse(attr.dataType) }
     }
@@ -393,7 +432,7 @@ object TableOutputResolver extends SQLConfHelper with Logging {
           tableName, colPath.quoted, extraColsStr
         )
       }
-    } else if (inputCols.size < actualExpectedCols.size) {
+    } else if (inputCols.size < actualExpectedCols.size && !fillDefaultValue) {
       val missingColsStr = actualExpectedCols.takeRight(actualExpectedCols.size - inputCols.size)
         .map(col => toSQLId(col.name))
         .mkString(", ")
@@ -407,25 +446,48 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       }
     }
 
-    inputCols.zip(actualExpectedCols).flatMap { case (inputCol, expectedCol) =>
+    val matched = inputCols.zip(actualExpectedCols).flatMap { case (inputCol, expectedCol) =>
       val newColPath = colPath :+ expectedCol.name
       (inputCol.dataType, expectedCol.dataType) match {
         case (inputType: StructType, expectedType: StructType) =>
           resolveStructType(
             tableName, inputCol, inputType, expectedCol, expectedType,
-            byName = false, conf, addError, newColPath, fillDefaultValue = false)
+            byName = false, conf, addError, newColPath, fillDefaultValue, enforceFullOutput = true)
         case (inputType: ArrayType, expectedType: ArrayType) =>
           resolveArrayType(
             tableName, inputCol, inputType, expectedCol, expectedType,
-            byName = false, conf, addError, newColPath, fillDefaultValue = false)
+            byName = false, conf, addError, newColPath, fillDefaultValue, enforceFullOutput = true)
         case (inputType: MapType, expectedType: MapType) =>
           resolveMapType(
             tableName, inputCol, inputType, expectedCol, expectedType,
-            byName = false, conf, addError, newColPath, fillDefaultValue = false)
+            byName = false, conf, addError, newColPath, fillDefaultValue, enforceFullOutput = true)
         case _ =>
           checkField(tableName, expectedCol, inputCol, byName = false, conf, addError, newColPath)
       }
     }
+
+    val defaults = if (fillDefaultValue) {
+      actualExpectedCols.drop(inputCols.size).map { expectedCol =>
+        val defaultExpr = getDefaultValueExprOrNullLit(
+          expectedCol, conf.useNullsForMissingDefaultColumnValues)
+        if (defaultExpr.isEmpty) {
+          throw QueryCompilationErrors.incompatibleDataToTableCannotFindDataError(
+            tableName, (colPath :+ expectedCol.name).quoted)
+        }
+        applyDefaultWithLengthCheck(defaultExpr.get, expectedCol, conf)
+      }
+    } else {
+      Nil
+    }
+
+    val result = matched ++ defaults
+    if (result.length != actualExpectedCols.size) {
+      val colName =
+        if (colPath.nonEmpty) colPath.quoted
+        else actualExpectedCols.map(_.name).map(toSQLId).mkString(", ")
+      throw QueryCompilationErrors.incompatibleDataToTableCannotFindDataError(tableName, colName)
+    }
+    result
   }
 
   private[sql] def checkNullability(
@@ -447,6 +509,7 @@ object TableOutputResolver extends SQLConfHelper with Logging {
     input.nullable && !attr.nullable && conf.storeAssignmentPolicy != StoreAssignmentPolicy.LEGACY
   }
 
+  // scalastyle:off argcount
   private def resolveStructType(
       tableName: String,
       input: Expression,
@@ -457,7 +520,8 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       conf: SQLConf,
       addError: String => Unit,
       colPath: Seq[String],
-      fillDefaultValue: Boolean): Option[NamedExpression] = {
+      fillDefaultValue: Boolean,
+      enforceFullOutput: Boolean): Option[NamedExpression] = {
     val nullCheckedInput = checkNullability(input, expected, conf, colPath)
     val fields = inputType.zipWithIndex.map { case (f, i) =>
       Alias(GetStructField(nullCheckedInput, i, Some(f.name)), f.name)()
@@ -465,10 +529,10 @@ object TableOutputResolver extends SQLConfHelper with Logging {
     val defaultValueMode = if (fillDefaultValue) RECURSE else NONE
     val resolved = if (byName) {
       reorderColumnsByName(tableName, fields, toAttributes(expectedType), conf, addError, colPath,
-        defaultValueMode)
+        defaultValueMode, enforceFullOutput)
     } else {
       resolveColumnsByPosition(
-        tableName, fields, toAttributes(expectedType), conf, addError, colPath)
+        tableName, fields, toAttributes(expectedType), conf, addError, colPath, fillDefaultValue)
     }
     if (resolved.length == expectedType.length) {
       val struct = CreateStruct(resolved)
@@ -478,6 +542,11 @@ object TableOutputResolver extends SQLConfHelper with Logging {
         struct
       }
       Some(applyColumnMetadata(res, expected))
+    } else if (enforceFullOutput) {
+      val colName =
+        if (colPath.nonEmpty) colPath.quoted
+        else expectedType.fields.map(_.name).map(toSQLId).mkString(", ")
+      throw QueryCompilationErrors.incompatibleDataToTableCannotFindDataError(tableName, colName)
     } else {
       None
     }
@@ -493,7 +562,8 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       conf: SQLConf,
       addError: String => Unit,
       colPath: Seq[String],
-      fillDefaultValue: Boolean): Option[NamedExpression] = {
+      fillDefaultValue: Boolean,
+      enforceFullOutput: Boolean): Option[NamedExpression] = {
     val nullCheckedInput = checkNullability(input, expected, conf, colPath)
     val param = NamedLambdaVariable("element", inputType.elementType, inputType.containsNull)
     val fakeAttr =
@@ -501,9 +571,10 @@ object TableOutputResolver extends SQLConfHelper with Logging {
     val res = if (byName) {
       val defaultValueMode = if (fillDefaultValue) RECURSE else NONE
       reorderColumnsByName(tableName, Seq(param), Seq(fakeAttr), conf, addError, colPath,
-        defaultValueMode)
+        defaultValueMode, enforceFullOutput)
     } else {
-      resolveColumnsByPosition(tableName, Seq(param), Seq(fakeAttr), conf, addError, colPath)
+      resolveColumnsByPosition(
+        tableName, Seq(param), Seq(fakeAttr), conf, addError, colPath, fillDefaultValue)
     }
     if (res.length == 1) {
       val castedArray =
@@ -515,6 +586,9 @@ object TableOutputResolver extends SQLConfHelper with Logging {
           ArrayTransform(nullCheckedInput, func)
         }
       Some(applyColumnMetadata(castedArray, expected))
+    } else if (enforceFullOutput) {
+      val colName = if (colPath.nonEmpty) colPath.quoted else toSQLId(expected.name)
+      throw QueryCompilationErrors.incompatibleDataToTableCannotFindDataError(tableName, colName)
     } else {
       None
     }
@@ -530,7 +604,8 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       conf: SQLConf,
       addError: String => Unit,
       colPath: Seq[String],
-      fillDefaultValue: Boolean): Option[NamedExpression] = {
+      fillDefaultValue: Boolean,
+      enforceFullOutput: Boolean): Option[NamedExpression] = {
     val nullCheckedInput = checkNullability(input, expected, conf, colPath)
 
     val keyParam = NamedLambdaVariable("key", inputType.keyType, nullable = false)
@@ -538,9 +613,10 @@ object TableOutputResolver extends SQLConfHelper with Logging {
     val defaultValueFillMode = if (fillDefaultValue) RECURSE else NONE
     val resKey = if (byName) {
       reorderColumnsByName(tableName, Seq(keyParam), Seq(fakeKeyAttr), conf, addError, colPath,
-        defaultValueFillMode)
+        defaultValueFillMode, enforceFullOutput)
     } else {
-      resolveColumnsByPosition(tableName, Seq(keyParam), Seq(fakeKeyAttr), conf, addError, colPath)
+      resolveColumnsByPosition(
+        tableName, Seq(keyParam), Seq(fakeKeyAttr), conf, addError, colPath, fillDefaultValue)
     }
 
     val valueParam =
@@ -549,10 +625,10 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       AttributeReference("value", expectedType.valueType, expectedType.valueContainsNull)()
     val resValue = if (byName) {
       reorderColumnsByName(tableName, Seq(valueParam), Seq(fakeValueAttr), conf, addError, colPath,
-        defaultValueFillMode)
+        defaultValueFillMode, enforceFullOutput)
     } else {
       resolveColumnsByPosition(
-        tableName, Seq(valueParam), Seq(fakeValueAttr), conf, addError, colPath)
+        tableName, Seq(valueParam), Seq(fakeValueAttr), conf, addError, colPath, fillDefaultValue)
     }
 
     if (resKey.length == 1 && resValue.length == 1) {
@@ -577,10 +653,14 @@ object TableOutputResolver extends SQLConfHelper with Logging {
           MapFromArrays(newKeys, newValues)
         }
       Some(applyColumnMetadata(casted, expected))
+    } else if (enforceFullOutput) {
+      val colName = if (colPath.nonEmpty) colPath.quoted else toSQLId(expected.name)
+      throw QueryCompilationErrors.incompatibleDataToTableCannotFindDataError(tableName, colName)
     } else {
       None
     }
   }
+  // scalastyle:on argcount
 
   // For table insertions, capture the overflow errors and show proper message.
   // Without this method, the overflow errors of castings will show hints for turning off ANSI SQL
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index ce387ef397aca..53de166e69edf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -228,6 +228,7 @@ object TypeCoercion extends TypeCoercionBase {
       case (_: StringType, target: NumericType) => target
       case (_: StringType, datetime: DatetimeType) => datetime
       case (_: StringType, AnyTimestampType) => AnyTimestampType.defaultConcreteType
+      case (_: StringType, AnyTimeType) => AnyTimeType.defaultConcreteType
       case (_: StringType, BinaryType) => BinaryType
       // Cast any atomic type to string except if there are strings with different collations.
       case (any: AtomicType, st: StringType) if !any.isInstanceOf[StringType] => st
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnresolveRelationsInTransaction.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnresolveRelationsInTransaction.scala
new file mode 100644
index 0000000000000..b0497897bbac2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnresolveRelationsInTransaction.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, TransactionalWrite}
+import org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.allowInvokingTransformsInAnalyzer
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+
+/**
+ * When a transaction is active, converts resolved [[DataSourceV2Relation]] nodes back to
+ * [[V2TableReference]] placeholders for all relations loaded by a catalog with the same
+ * name as the transaction catalog.
+ *
+ * This forces re-resolution of those relations against the transaction's catalog, which
+ * intercepts [[TableCatalog#loadTable]] calls to track which tables are read as part of
+ * the transaction.
+ */
+class UnresolveRelationsInTransaction(val catalogManager: CatalogManager)
+  extends Rule[LogicalPlan] with LookupCatalog {
+
+  override def apply(plan: LogicalPlan): LogicalPlan =
+    catalogManager.transaction match {
+      case Some(transaction) =>
+        // We use plain transform rather than resolveOperators* because the latter skips subtrees
+        // that have already been analyzed. Furthermore, allowInvokingTransformsInAnalyzer
+        // allows to suppress the assertNotAnalysisRule safety check, which forbids calling
+        // transform directly inside the analyzer when not within a resolveOperators call.
+        allowInvokingTransformsInAnalyzer {
+          plan.transform {
+            case tw: TransactionalWrite =>
+              unresolveRelations(tw, transaction.catalog)
+          }
+        }
+      case _ => plan
+    }
+
+  private def unresolveRelations(
+      plan: LogicalPlan,
+      catalog: CatalogPlugin): LogicalPlan = {
+    // Walk subqueries too: relations from the transaction's catalog that hide inside scalar /
+    // IN / EXISTS subqueries must also be re-resolved through the txn-aware catalog so the
+    // connector can track every read in the transaction's scope. We use `transformWithSubqueries`
+    // rather than `resolveOperators*` so that subtrees marked `analyzed = true` (e.g. from
+    // cached/pre-analyzed DataFrames or temp views) are still rewritten.
+    plan.transformWithSubqueries {
+      case r: DataSourceV2Relation if isLoadedFromCatalog(r, catalog) =>
+        V2TableReference.createForTransaction(r)
+    }
+  }
+
+  private def isLoadedFromCatalog(
+      relation: DataSourceV2Relation,
+      catalog: CatalogPlugin): Boolean = {
+    relation.catalog.exists(_.name == catalog.name) && relation.identifier.isDefined
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index f9fb2936a9145..83ad97fdc4faf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -23,7 +23,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.LogKeys.{ANALYSIS_ERROR, QUERY_PLAN}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, CurrentDate, CurrentTimestampLike, Expression, GroupingSets, LocalTimestamp, MonotonicallyIncreasingID, SessionWindow, WindowExpression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, CurrentDate, CurrentTimestampLike, Expression, GroupingSets, LocalTimestamp, MonotonicallyIncreasingID, NamedExpression, SessionWindow, WindowExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -280,6 +280,40 @@ object UnsupportedOperationChecker extends Logging {
       case _ =>
     }
 
+    // The streaming Change Data Capture (CDC) post-processing rewrites in
+    // [[ResolveChangelogTable]] are designed and validated only for Append output mode.
+    // Two markers can identify a CDC-rewritten plan:
+    //   - The row-level rewrite (`addStreamingRowLevelPostProcessing`) injects a
+    //     streaming Aggregate whose `__spark_cdc_events` alias's output attribute
+    //     carries the metadata marker
+    //     `ResolveChangelogTable.streamingPostProcessingMarker` (mirrors
+    //     `SessionWindow.marker` / `EventTimeWatermark.delayKey`).
+    //   - The netChanges rewrite (`addStreamingNetChangeComputation`) injects a
+    //     `TransformWithState` driven by `CdcNetChangesStatefulProcessor`.
+    // Under Update or Complete the Aggregate / TransformWithState would re-emit
+    // per-batch state changes or the full result table per batch, neither of which
+    // matches batch CDC semantics. (Complete mode without any streaming Aggregate is
+    // already rejected by the generic `aggregates.isEmpty` check above, so the
+    // netChanges-only marker is needed here primarily to catch Update mode.) Reject
+    // those modes at analysis time with a clear error rather than silently producing
+    // a misleading change feed.
+    val containsCdcRowLevelRewrite = aggregates.exists(a => a.aggregateExpressions.exists {
+      case ne: NamedExpression if ne.resolved =>
+        ne.metadata.contains(ResolveChangelogTable.streamingPostProcessingMarker) &&
+          ne.metadata.getBoolean(ResolveChangelogTable.streamingPostProcessingMarker)
+      case _ => false
+    })
+    val containsCdcNetChangesProcessor = plan.exists {
+      case t: TransformWithState if t.isStreaming &&
+        t.statefulProcessor.isInstanceOf[CdcNetChangesStatefulProcessor] => true
+      case _ => false
+    }
+    if (outputMode != InternalOutputModes.Append &&
+        (containsCdcRowLevelRewrite || containsCdcNetChangesProcessor)) {
+      throw QueryCompilationErrors.unsupportedOutputModeForStreamingOperationError(
+        outputMode, "Change Data Capture (CDC) streaming reads with post-processing")
+    }
+
     /**
      * Whether the subplan will contain complete data or incremental data in every incremental
      * execution. Some operations may be allowed only when the child logical plan gives complete
@@ -545,7 +579,7 @@ object UnsupportedOperationChecker extends Logging {
           throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on " +
             "aggregated DataFrame/Dataset in Complete output mode")
 
-        case Sample(_, _, _, _, child) if child.isStreaming =>
+        case Sample(_, _, _, _, child, _) if child.isStreaming =>
           throwError("Sampling is not supported on streaming DataFrames/Datasets")
 
         case Window(windowExpression, _, _, child, _) if child.isStreaming =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/V2TableReference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/V2TableReference.scala
index 85c36d452b309..223e7012af6b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/V2TableReference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/V2TableReference.scala
@@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.V2TableReference.Context
 import org.apache.spark.sql.catalyst.analysis.V2TableReference.TableInfo
 import org.apache.spark.sql.catalyst.analysis.V2TableReference.TemporaryViewContext
+import org.apache.spark.sql.catalyst.analysis.V2TableReference.TransactionContext
+import org.apache.spark.sql.catalyst.analysis.V2TableReference.WriteTargetContext
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.LeafNode
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
@@ -37,7 +39,7 @@ import org.apache.spark.sql.connector.catalog.V2TableUtil
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
-import org.apache.spark.sql.util.SchemaValidationMode.ALLOW_NEW_TOP_LEVEL_FIELDS
+import org.apache.spark.sql.util.SchemaValidationMode.{ALLOW_NEW_TOP_LEVEL_FIELDS, PROHIBIT_CHANGES}
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -79,22 +81,50 @@ private[sql] case class V2TableReference private(
 private[sql] object V2TableReference {
 
   case class TableInfo(
+      tableId: Option[String],
       columns: Seq[Column],
       metadataColumns: Seq[MetadataColumn])
 
-  sealed trait Context
-  case class TemporaryViewContext(viewName: Seq[String]) extends Context
+  sealed trait Context {
+    def cacheable: Boolean
+  }
+
+  /** Context for relations that are re-resolved on access of a dataframe temp view. */
+  case class TemporaryViewContext(viewName: Seq[String]) extends Context {
+    val cacheable = true
+  }
+
+  /** Context for relations that are re-resolved through a transaction catalog. */
+  case object TransactionContext extends Context {
+    val cacheable = true
+  }
+
+  /** Context for write targets. */
+  case object WriteTargetContext extends Context {
+    val cacheable = false
+  }
 
   def createForTempView(relation: DataSourceV2Relation, viewName: Seq[String]): V2TableReference = {
     create(relation, TemporaryViewContext(viewName))
   }
 
+  // V2TableReference nodes in the transaction context are produced by
+  // UnresolveRelationsInTransaction which unresolves already resolved relations.
+  def createForTransaction(relation: DataSourceV2Relation): V2TableReference = {
+    create(relation, TransactionContext)
+  }
+
+  def createForWriteTarget(relation: DataSourceV2Relation): V2TableReference = {
+    create(relation, WriteTargetContext)
+  }
+
   private def create(relation: DataSourceV2Relation, context: Context): V2TableReference = {
     val ref = V2TableReference(
       relation.catalog.get.asTableCatalog,
       relation.identifier.get,
       relation.options,
       TableInfo(
+        tableId = Option(relation.table.id),
         columns = relation.table.columns.toImmutableArraySeq,
         metadataColumns = V2TableUtil.extractMetadataColumns(relation)),
       relation.output,
@@ -110,11 +140,44 @@ private[sql] object V2TableReferenceUtils extends SQLConfHelper {
     ref.context match {
       case ctx: TemporaryViewContext =>
         validateLoadedTableInTempView(table, ref, ctx)
+      case TransactionContext | WriteTargetContext =>
+        validateNoChanges(table, ref)
       case ctx =>
         throw SparkException.internalError(s"Unknown table ref context: ${ctx.getClass.getName}")
     }
   }
 
+  private def validateNoChanges(table: Table, ref: V2TableReference): Unit = {
+    // Make sure the table was not dropped and recreated.
+    ref.info.tableId.foreach(V2TableUtil.validateTableId(ref.name, _, table))
+
+    // Detect columns that were dropped and re-added with the same name but a different
+    // column ID. This catches replacements that preserve the schema but change identity.
+    val colIdErrors = V2TableUtil.validateColumnIds(
+      table = table,
+      originalCapturedCols = ref.info.columns)
+    if (colIdErrors.nonEmpty) {
+      throw QueryCompilationErrors.columnIdMismatchAfterAnalysis(ref.name, colIdErrors)
+    }
+
+    // Do not allow schema evolution to pre-analysed dataframes that are later used in
+    // transactional writes. This is because the entire plans was built based on the original schema
+    // and any schema change would make the plan structurally invalid. This is inline with the
+    // semantics of SPARK-54444.
+    val dataErrors = V2TableUtil.validateCapturedColumns(
+      table = table,
+      originCols = ref.info.columns,
+      mode = PROHIBIT_CHANGES)
+    if (dataErrors.nonEmpty) {
+      throw QueryCompilationErrors.columnsMissingOrAddedAfterAnalysis(ref.name, dataErrors)
+    }
+
+    val metaErrors = V2TableUtil.validateCapturedMetadataColumns(table, ref.info.metadataColumns)
+    if (metaErrors.nonEmpty) {
+      throw QueryCompilationErrors.metadataColumnsChangedAfterAnalysis(ref.name, metaErrors)
+    }
+  }
+
   private def validateLoadedTableInTempView(
       table: Table,
       ref: V2TableReference,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala
index 0095885c0135d..bc85ccfee34c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/VariableResolution.scala
@@ -33,7 +33,36 @@ import org.apache.spark.sql.connector.catalog.{
   Identifier
 }
 
-class VariableResolution(tempVariableManager: TempVariableManager) extends SQLConfHelper {
+class VariableResolution(
+    tempVariableManager: TempVariableManager,
+    catalogManager: CatalogManager)
+    extends SQLConfHelper {
+
+  /**
+   * Unqualified session variables resolve only when SYSTEM.SESSION is on the SQL path
+   * (PATH enabled and explicitly set).
+   */
+  private def allowUnqualifiedSessionTempVariableLookup(nameParts: Seq[String]): Boolean = {
+    nameParts.length != 1 || catalogManager.isSystemSessionOnPath
+  }
+
+  /**
+   * Search-path entries to report in `UNRESOLVED_VARIABLE` for DML lookups (`SET VAR`,
+   * `FETCH ... INTO`). The full SQL path is reported regardless of how the name was
+   * qualified, matching the convention used by `TABLE_OR_VIEW_NOT_FOUND` and
+   * `UNRESOLVED_ROUTINE`. Keeping the rendering qualification-independent also avoids
+   * re-shaping the error if Spark ever grows struct-field assignment, where 2-part forms
+   * become genuinely ambiguous.
+   *
+   * DDL paths (`DECLARE` / `DROP` name validation in
+   * [[org.apache.spark.sql.catalyst.analysis.ResolveCatalogs]]) do not consult the SQL path
+   * and report `[system.session]` directly at their throw site.
+   */
+  def searchPathEntriesForError: Seq[Seq[String]] = {
+    catalogManager.resolutionPathEntriesForAnalysis(
+      AnalysisContext.get.resolutionPathEntries,
+      AnalysisContext.get.catalogAndNamespace)
+  }
 
   /**
    * Resolves a `multipartName` to an [[Expression]] tree, supporting nested field access.
@@ -125,7 +154,8 @@ class VariableResolution(tempVariableManager: TempVariableManager) extends SQLCo
         )
       }
       .orElse(
-        if (maybeTempVariableName(nameParts)) {
+        if (maybeTempVariableName(nameParts) &&
+            allowUnqualifiedSessionTempVariableLookup(nameParts)) {
           tempVariableManager
             .get(namePartsCaseAdjusted)
             .map { varDef =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala
index faa3b9081cbfd..b0f0ef3b092c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala
@@ -38,7 +38,7 @@ object ViewResolution {
       val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth
       if (nestedViewDepth > maxNestedViewDepth) {
         throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError(
-          view.desc.identifier,
+          view.desc.fullIdent,
           maxNestedViewDepth,
           view
         )
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
index bf9acb775ce10..31c835986e20d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, SubqueryExpression, Unevaluable}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SupervisingCommand}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, LogicalPlan, SupervisingCommand, V2WriteCommand}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMAND, PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
@@ -179,9 +179,30 @@ object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {
     p0.resolveOperatorsDownWithPruning(_.containsPattern(PARAMETER) && !stop) {
       case p1 =>
         stop = p1.isInstanceOf[ParameterizedQuery]
-        p1.transformExpressionsWithPruning(_.containsPattern(PARAMETER)) (f orElse {
-          case sub: SubqueryExpression => sub.withNewPlan(bind(sub.plan)(f))
-        })
+        // `InsertIntoStatement.table` and `V2WriteCommand.table` are non-child LogicalPlan
+        // slots, so the standard `resolveOperatorsDown` traversal never visits parameter
+        // markers inside them. Recurse explicitly so `INSERT ... IDENTIFIER(:p)` and
+        // `INSERT INTO IDENTIFIER(:p) REPLACE WHERE ...` resolve under the legacy
+        // parameter-substitution mode (SPARK-46625). Today only the `OverwriteByExpression`
+        // variant of `V2WriteCommand` is parser-built with a placeholder in `table`; the trait
+        // match keeps the rule consistent for any future analyzer-built node in the same shape.
+        val withBoundTable = p1 match {
+          case i: InsertIntoStatement if i.table.containsPattern(PARAMETER) =>
+            i.copy(table = bind(i.table)(f))
+          case w: V2WriteCommand if w.table.containsPattern(PARAMETER) =>
+            bind(w.table)(f) match {
+              case nr: NamedRelation => w.withNewTable(nr)
+              case other =>
+                throw SparkException.internalError(
+                  "Parameter binding on V2WriteCommand.table must preserve " +
+                    s"NamedRelation, but got: ${other.getClass.getName}")
+            }
+          case other => other
+        }
+        withBoundTable.transformExpressionsWithPruning(_.containsPattern(PARAMETER)) (
+          f orElse {
+            case sub: SubqueryExpression => sub.withNewPlan(bind(sub.plan)(f))
+          })
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala
index 1a8658bb764d5..dd70963a79841 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala
@@ -60,7 +60,7 @@ import org.apache.spark.sql.catalyst.util.CollationFactory
  */
 class FunctionResolver(
     expressionResolver: ExpressionResolver,
-    functionResolution: FunctionResolution,
+    protected val functionResolution: FunctionResolution,
     aggregateExpressionResolver: AggregateExpressionResolver,
     binaryArithmeticResolver: BinaryArithmeticResolver)
     extends TreeNodeResolver[UnresolvedFunction, Expression]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala
index 503c94fc9cdf6..3c5a3f1832e8d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolverUtils.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.analysis.resolver
 
 import java.util.Locale
 
+import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{
+  FunctionResolution,
   ResolvedStar,
   Star,
   UnresolvedFunction,
@@ -35,6 +37,7 @@ import org.apache.spark.sql.internal.SQLConf
  */
 trait FunctionResolverUtils {
   protected def expressionResolver: ExpressionResolver
+  protected def functionResolution: FunctionResolution
   protected def conf: SQLConf
 
   private val scopes = expressionResolver.getNameScopes
@@ -99,7 +102,21 @@ trait FunctionResolverUtils {
       unresolvedFunction: UnresolvedFunction,
       normalizeFunctionName: Boolean = true
   ): Boolean = {
-    !unresolvedFunction.isDistinct && isCount(unresolvedFunction, normalizeFunctionName)
+    !unresolvedFunction.isDistinct &&
+      isCount(unresolvedFunction, normalizeFunctionName) &&
+      !isUnqualifiedCountShadowedByTemp(unresolvedFunction)
+  }
+
+  /**
+   * Keep single-pass behavior aligned with fixed-point: when PATH puts system.session before
+   * system.builtin and a temp `count` exists, unqualified `count(*)` must not be rewritten to
+   * `count(1)`.
+   */
+  private def isUnqualifiedCountShadowedByTemp(unresolvedFunction: UnresolvedFunction): Boolean = {
+    unresolvedFunction.nameParts.length == 1 &&
+      functionResolution.isSessionBeforeBuiltinInPath &&
+      functionResolution.catalogManager.v1SessionCatalog
+        .isTemporaryFunction(FunctionIdentifier(unresolvedFunction.nameParts.head))
   }
 
   private def isCount(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala
index 6b90a5c05baf1..676ef381f2f17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HigherOrderFunctionResolver.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.errors.QueryCompilationErrors
  */
 class HigherOrderFunctionResolver(
     protected val expressionResolver: ExpressionResolver,
-    functionResolution: FunctionResolution)
+    protected val functionResolution: FunctionResolution)
     extends TreeNodeResolver[UnresolvedFunction, Expression]
     with ProducesUnresolvedSubtree
     with CoercesExpressionTypes
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameScope.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameScope.scala
index 1c9a296af9ba4..2c39fe71e62ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameScope.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameScope.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.catalyst.expressions.{
 }
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.Metadata
 
@@ -974,13 +975,14 @@ class NameScope(
  */
 class NameScopeStack(
     tempVariableManager: TempVariableManager,
+    catalogManager: CatalogManager,
     subqueryRegistry: SubqueryRegistry,
     planLogger: PlanLogger = new PlanLogger)
     extends SQLConfHelper {
   private val stack = new ArrayDeque[NameScope]
   stack.push(new NameScope(planLogger = planLogger))
 
-  private val variableResolution = new VariableResolution(tempVariableManager)
+  private val variableResolution = new VariableResolution(tempVariableManager, catalogManager)
 
   /**
    * Get the current scope, which is a default choice for name resolution.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/Resolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/Resolver.scala
index 35d752779d6c6..aaf7117ef4e8b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/Resolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/Resolver.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.analysis.{
   withPosition,
+  AnalysisContext,
   AnalysisErrorAt,
   CleanupAliases,
   FunctionResolution,
@@ -53,8 +54,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.catalyst.util.EvaluateUnresolvedInlineTable
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -96,6 +96,7 @@ class Resolver(
   private val subqueryRegistry = new SubqueryRegistry
   private val scopes = new NameScopeStack(
     tempVariableManager = catalogManager.tempVariableManager,
+    catalogManager = catalogManager,
     subqueryRegistry = subqueryRegistry,
     planLogger = planLogger
   )
@@ -583,9 +584,14 @@ class Resolver(
           relationsWithResolvedMetadata
         case None =>
           val multipartId = unresolvedRelation.multipartIdentifier
-          val catalogPath = (catalogManager.currentCatalog.name() +:
-            catalogManager.currentNamespace).toSeq
-          val searchPath = SQLConf.get.resolutionSearchPath(catalogPath).map(toSQLId)
+          val catalogPath = {
+            val ctx = AnalysisContext.get.catalogAndNamespace
+            if (ctx.nonEmpty) ctx
+            else (catalogManager.currentCatalog.name() +: catalogManager.currentNamespace).toSeq
+          }
+          val searchPath = catalogManager
+            .sqlResolutionPathEntries(catalogPath.head, catalogPath.tail.toSeq)
+            .map(toSQLId)
           unresolvedRelation.tableNotFound(multipartId, searchPath)
       }
 
@@ -842,7 +848,7 @@ class Resolver(
         messageParameters = Map(
           "missingAttributes" -> makeCommaSeparatedExpressionString(missingInput.toSeq),
           "input" -> makeCommaSeparatedExpressionString(inputSet.toSeq),
-          "operator" -> operator.simpleString(conf.maxToStringFields),
+          "operator" -> operator.simpleString(SQLConf.get.maxToStringFields),
           "operation" -> makeCommaSeparatedExpressionString(attributesWithSameName.toSeq)
         )
       )
@@ -852,7 +858,7 @@ class Resolver(
         messageParameters = Map(
           "missingAttributes" -> makeCommaSeparatedExpressionString(missingInput.toSeq),
           "input" -> makeCommaSeparatedExpressionString(inputSet.toSeq),
-          "operator" -> operator.simpleString(conf.maxToStringFields)
+          "operator" -> operator.simpleString(SQLConf.get.maxToStringFields)
         )
       )
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverGuard.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverGuard.scala
index e2171b84b6eb2..ed41c320f8c9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverGuard.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverGuard.scala
@@ -474,9 +474,8 @@ class ResolverGuard(
   private def checkUnresolvedFunction(unresolvedFunction: UnresolvedFunction) = {
     val nameParts = unresolvedFunction.nameParts
     val funcName = nameParts.last.toLowerCase(Locale.ROOT)
-
-    if (nameParts.length == 1) {
-      // Unqualified: same as master (unsupported, non-builtin, or check children)
+    if (nameParts.size == 1) {
+      // Unqualified: reject if unsupported, else non-builtin or check children (same as master)
       if (isUnsupportedFunction(funcName)) {
         Some(s"unsupported function ${funcName}")
       } else if (!isBuiltinFunction(funcName)) {
@@ -493,7 +492,7 @@ class ResolverGuard(
         unresolvedFunction.children.collectFirst { case CheckExpression(reason) => reason }
       }
     } else if (FunctionResolution.sessionNamespaceKind(nameParts).isDefined) {
-      // Session-qualified: allow through (system-first behavior)
+      // Session-qualified: allow through (PATH + system-first)
       unresolvedFunction.children.collectFirst { case CheckExpression(reason) => reason }
     } else {
       Some("multi-part function name")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala
index 992f065ef3aa2..a224e521b548b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala
@@ -193,7 +193,7 @@ case class ViewResolutionContext(
   def validate(unresolvedView: View): Unit = {
     if (nestedViewDepth > maxNestedViewDepth) {
       throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError(
-        unresolvedView.desc.identifier,
+        unresolvedView.desc.fullIdent,
         maxNestedViewDepth,
         unresolvedView
       )
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index bac5651265d94..a5b467d0f0816 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -59,12 +59,22 @@ trait UnresolvedUnaryNode extends UnaryNode with UnresolvedNode
 /**
  * A logical plan placeholder that holds the identifier clause string expression. It will be
  * replaced by the actual logical plan with the evaluated identifier string.
+ *
+ * Extends `NamedRelation` so it can occupy a `NamedRelation`-typed slot (e.g.
+ * `OverwriteByExpression.table`) directly at parse time, instead of wrapping the whole command.
+ *
+ * The parser always places this node inside the command's identifier slot (a child slot for
+ * DELETE/UPDATE/MERGE/CTAS/RTAS, or a non-child slot for `InsertIntoStatement.table` and
+ * `OverwriteByExpression.table` -- handled via explicit cases in `ResolveIdentifierClause` and
+ * `BindParameters`). It is never the substitution root of a `WITH ... <command>` subtree, so
+ * `CTEInChildren` semantics are not needed: any surrounding `WithCTE` produced by
+ * `CTESubstitution` targets the inner command directly.
  */
 case class PlanWithUnresolvedIdentifier(
     identifierExpr: Expression,
     children: Seq[LogicalPlan],
     planBuilder: (Seq[String], Seq[LogicalPlan]) => LogicalPlan)
-  extends UnresolvedNode {
+  extends UnresolvedNode with NamedRelation {
 
   def this(identifierExpr: Expression, planBuilder: Seq[String] => LogicalPlan) = {
     this(identifierExpr, Nil, (ident, _) => planBuilder(ident))
@@ -72,6 +82,12 @@ case class PlanWithUnresolvedIdentifier(
 
   final override val nodePatterns: Seq[TreePattern] = Seq(PLAN_WITH_UNRESOLVED_IDENTIFIER)
 
+  // Placeholder name used by error paths that render `NamedRelation.name` for an unresolved
+  // table reference -- e.g. `SparkStrategies.extractTableNameForError` and the `r: NamedRelation`
+  // fallback in `QueryCompilationErrors`. Renders as the SQL text of the identifier expression
+  // (e.g. `IDENTIFIER(:p)` or `concat('a', 'b')`) so error messages remain informative.
+  override def name: String = identifierExpr.sql
+
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[LogicalPlan]): LogicalPlan =
     copy(identifierExpr, newChildren, planBuilder)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index fd05b7cd5a2a7..a8f5f0688890c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -68,15 +68,37 @@ case class UnresolvedView(
     allowTemp: Boolean,
     suggestAlternative: Boolean = false) extends UnresolvedLeafNode
 
+/**
+ * Controls which search path is shown in `TABLE_OR_VIEW_NOT_FOUND` for
+ * [[UnresolvedTableOrView]] (see [[org.apache.spark.sql.catalyst.analysis.CheckAnalysis]]).
+ */
+sealed trait UnresolvedTableOrViewSearchPathMode
+
+object UnresolvedTableOrViewSearchPathMode {
+  /** DDL on catalog objects: `system.session` and current catalog namespace only. */
+  case object Ddl extends UnresolvedTableOrViewSearchPathMode
+  /**
+   * Like `SELECT` / DML: full `sqlResolutionPathEntries` order; fully qualified
+   * `system.session.*` names still use the session-only path in errors.
+   */
+  case object QueryLike extends UnresolvedTableOrViewSearchPathMode
+}
+
 /**
  * Holds the name of a table or view that has yet to be looked up in a catalog. It will
  * be resolved to [[ResolvedTable]], [[ResolvedPersistentView]] or [[ResolvedTempView]] during
  * analysis.
+ *
+ * @param tableNotFoundSearchPathMode how to format `searchPath` in `TABLE_OR_VIEW_NOT_FOUND`;
+ *                                    set explicitly at parse / construction time (not inferred
+ *                                    from [[commandName]]).
  */
 case class UnresolvedTableOrView(
     multipartIdentifier: Seq[String],
     commandName: String,
-    allowTempView: Boolean) extends UnresolvedLeafNode
+    allowTempView: Boolean,
+    tableNotFoundSearchPathMode: UnresolvedTableOrViewSearchPathMode =
+      UnresolvedTableOrViewSearchPathMode.Ddl) extends UnresolvedLeafNode
 
 sealed trait PartitionSpec extends LeafExpression with Unevaluable {
   override def dataType: DataType = throw SparkException.internalError(
@@ -205,15 +227,37 @@ case class ResolvedProcedure(
 }
 
 /**
- * A plan containing resolved persistent views.
+ * A plan containing a resolved persistent view.
+ *
+ * `info` is the typed v2 [[org.apache.spark.sql.connector.catalog.ViewInfo]] payload for the
+ * view. Session-catalog (v1) views are surfaced through the same channel via
+ * [[org.apache.spark.sql.connector.catalog.V1ViewInfo]], which extends `ViewInfo` and wraps
+ * the original [[CatalogTable]] -- mirroring the way
+ * [[org.apache.spark.sql.connector.catalog.V1Table]] exposes a v1 `CatalogTable` through the
+ * v2 [[org.apache.spark.sql.connector.catalog.Table]] surface for `ResolvedTable`. v1-only
+ * paths (e.g. `DescribeTableCommand`, `ShowCreateTableCommand`) recover the original
+ * `CatalogTable` by pattern-matching `info` against `V1ViewInfo`.
  */
-// TODO: create a generic representation for views, after we add view support to v2 catalog. For now
-//       we only hold the view schema.
 case class ResolvedPersistentView(
     catalog: CatalogPlugin,
     identifier: Identifier,
-    metadata: CatalogTable) extends LeafNodeWithoutStats {
-  override def output: Seq[Attribute] = Nil
+    info: org.apache.spark.sql.connector.catalog.ViewInfo)
+  extends LeafNodeWithoutStats {
+  // Surface the view's schema as `output` so `ResolveReferences` can resolve column references
+  // against it (e.g. `DescribeColumn(ResolvedPersistentView, UnresolvedAttribute, ...)`). The
+  // schema is otherwise unused -- consumers read `info` directly and don't iterate `output`.
+  // SELECT on a view goes through view-text expansion and never produces this node, so giving
+  // it output does not affect query resolution.
+  override lazy val output: Seq[Attribute] =
+    toAttributes(CharVarcharUtils.replaceCharVarcharWithStringInSchema(info.schema))
+
+  // Render `info` in plan-tree output as the qualified view name. The default case-class
+  // `toString` would format `info` via `Object.toString`, which produces `V1ViewInfo@<hash>`
+  // for the v1 leg and a similarly opaque hash for the v2 leg -- non-deterministic and useless
+  // in EXPLAIN / golden file output. Replace it with the multi-part `catalog.namespace.name`
+  // form so EXPLAIN, plan-tree dumps, and `SQLQueryTestSuite` golden files remain stable.
+  override protected def stringArgs: Iterator[Any] =
+    Iterator(catalog, identifier, (catalog.name +: identifier.namespace :+ identifier.name).quoted)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index cd4a5645151b6..1aa5c483db88b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -394,7 +394,9 @@ class InMemoryCatalog(
 
   override def listViews(db: String, pattern: String): Seq[String] = synchronized {
     requireDbExists(db)
-    val views = catalog(db).tables.filter(_._2.table.tableType == CatalogTableType.VIEW).keySet
+    val views = catalog(db).tables.filter { case (_, t) =>
+      t.table.isViewLike
+    }.keySet
     StringUtils.filterPattern(views.toSeq.sorted, pattern)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
index 07ca0a8712485..25ce823337eff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
@@ -122,7 +122,10 @@ case class SQLFunction(
    * Convert the SQL function to a [[CatalogFunction]].
    */
   def toCatalogFunction: CatalogFunction = {
-    val props = sqlFunctionToProps ++ properties
+    // Persist function metadata (owner, createTime) alongside the SQL function
+    // body so the values survive a session restart and can be rendered by
+    // DESCRIBE FUNCTION EXTENDED.
+    val props = sqlFunctionToProps ++ functionMetadataToProps ++ properties
     CatalogFunction(
       identifier = name,
       className = SQL_FUNCTION_PREFIX,
@@ -187,6 +190,9 @@ case class SQLFunction(
 
 object SQLFunction {
 
+  val SCALAR = "SCALAR"
+  val TABLE = "TABLE"
+
   /**
    * Persisted frozen PATH for SQL function bodies when created with [[SQLConf.PATH_ENABLED]].
    * Serialized as a JSON array of path entries (same format as
@@ -227,21 +233,7 @@ object SQLFunction {
       }
       val blob = parts.sortBy(_._1).map(_._2).mkString
       val props = mapper.readValue(blob, classOf[Map[String, String]])
-      val isTableFunc = props(IS_TABLE_FUNC).toBoolean
-      val collation = props.get(COLLATION)
-      val returnType = parseReturnTypeText(props(RETURN_TYPE), isTableFunc, parser, collation)
-      SQLFunction(
-        name = function.identifier,
-        inputParam = props.get(INPUT_PARAM).map(parseRoutineParam(_, parser, collation)),
-        returnType = returnType.get,
-        exprText = props.get(EXPRESSION),
-        queryText = props.get(QUERY),
-        comment = props.get(COMMENT),
-        collation = collation,
-        deterministic = props.get(DETERMINISTIC).map(_.toBoolean),
-        containsSQL = props.get(CONTAINS_SQL).map(_.toBoolean),
-        isTableFunc = isTableFunc,
-        props.filterNot(_._1.startsWith(SQL_FUNCTION_PREFIX)))
+      fromProps(props, function.identifier, parser)
     } catch {
       case e: Exception =>
         throw new AnalysisException(
@@ -253,6 +245,56 @@ object SQLFunction {
     }
   }
 
+  /**
+   * Convert an [[ExpressionInfo]] into a SQL function.
+   */
+  def fromExpressionInfo(info: ExpressionInfo, parser: ParserInterface): SQLFunction = {
+    try {
+      val props = mapper.readValue(info.getUsage, classOf[Map[String, String]])
+      fromProps(props, FunctionIdentifier(info.getName, Option(info.getDb)), parser)
+    } catch {
+      case e: Exception =>
+        throw new AnalysisException(
+          errorClass = "CORRUPTED_CATALOG_FUNCTION",
+          messageParameters = Map(
+            "identifier" -> s"${info.getDb}.${info.getName}",
+            "className" -> s"${info.getClassName}"), cause = Some(e)
+        )
+    }
+  }
+
+  /**
+   * Build a [[SQLFunction]] from a deserialized property map and a function identifier.
+   * Shared by both [[fromCatalogFunction]] and [[fromExpressionInfo]] so all readers
+   * stay in sync as new properties are added.
+   *
+   * `OWNER` is optional and defaults to `None` when missing; `CREATE_TIME` falls back
+   * to the current wall-clock time so functions persisted before metadata was added
+   * to the catalog payload still load.
+   */
+  private def fromProps(
+      props: Map[String, String],
+      identifier: FunctionIdentifier,
+      parser: ParserInterface): SQLFunction = {
+    val isTableFunc = props(IS_TABLE_FUNC).toBoolean
+    val collation = props.get(COLLATION)
+    val returnType = parseReturnTypeText(props(RETURN_TYPE), isTableFunc, parser, collation)
+    SQLFunction(
+      name = identifier,
+      inputParam = props.get(INPUT_PARAM).map(parseRoutineParam(_, parser, collation)),
+      returnType = returnType.get,
+      exprText = props.get(EXPRESSION),
+      queryText = props.get(QUERY),
+      comment = props.get(COMMENT),
+      collation = collation,
+      deterministic = props.get(DETERMINISTIC).map(_.toBoolean),
+      containsSQL = props.get(CONTAINS_SQL).map(_.toBoolean),
+      isTableFunc = isTableFunc,
+      properties = props.filterNot(_._1.startsWith(SQL_FUNCTION_PREFIX)),
+      owner = props.get(OWNER),
+      createTimeMs = props.get(CREATE_TIME).map(_.toLong).getOrElse(System.currentTimeMillis))
+  }
+
   def parseDefault(text: String, parser: ParserInterface): Expression = {
     parser.parseExpression(text)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index ff4a135b7d044..fd700f1ee0130 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -60,6 +60,13 @@ import org.apache.spark.util.Utils
 object SessionCatalog {
   val DEFAULT_DATABASE = "default"
 
+  /**
+   * Metadata key marking an Alias / Attribute as originating from a SQL UDF input parameter.
+   * Consumed by name resolution: a parameterless built-in function takes precedence over a
+   * tagged alias of the same name.
+   */
+  val SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY: String = "__funcInputAlias"
+
   /**
    * Kind of session-scoped function namespace for lookup/resolve.
    * Used by the kind-based API to avoid separate methods per
@@ -113,18 +120,76 @@ class SessionCatalog(
     identifier.copy(funcName = "") == SESSION_NAMESPACE_TEMPLATE
 
   /**
-   * Session function kinds in resolution order for unqualified lookups.
-   * Matches [[SQLConf.sessionFunctionResolutionOrder]]: "first" (session first),
-   * "second" (default), "last" (builtin only; session tried after persistent).
+   * When set, unqualified builtin/temp function resolution uses this fixed kind order instead of
+   * [[catalogManagerForSessionFunctionKinds]] / [[SQLConf.defaultPathOrder]]. For unit tests only;
+   * production relies on the catalog manager binding.
    */
-  private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] = {
-    conf.sessionFunctionResolutionOrder match {
-      case "first" => Seq(Temp, Builtin)
-      case "last" => Seq(Builtin)
-      case _ => Seq(Builtin, Temp) // "second" (default)
-    }
+  @volatile private var sessionFunctionKindsTestOverride: Option[Seq[SessionFunctionKind]] = None
+
+  /**
+   * Live PATH for session function kinds. Set from
+   * [[org.apache.spark.sql.connector.catalog.DefaultCatalogManager]]'s constructor via
+   * [[bindCatalogManagerForSessionFunctionKinds]] so unqualified lookups and the security check
+   * that blocks temp functions from shadowing builtins read the effective SQL PATH (post-`SET
+   * PATH`, with [[SQLConf.DEFAULT_PATH]] and [[SQLConf.defaultPathOrder]] fallbacks already
+   * applied).
+   *
+   * When unset (e.g. standalone [[SessionCatalog]] in tests), kinds derive from
+   * [[SQLConf.defaultPathOrder]] with no catalog entries -- equivalent to the system-namespace
+   * entries of the spark-built-in default path. This includes both `system.builtin` and
+   * `system.session` so unqualified temp functions are still resolvable in test setups.
+   */
+  @volatile private var catalogManagerForSessionFunctionKinds: Option[CatalogManager] = None
+
+  /**
+   * Wire live PATH-derived session function kinds from the session [[CatalogManager]].
+   * Called once from [[org.apache.spark.sql.connector.catalog.DefaultCatalogManager]]'s
+   * constructor.
+   */
+  private[sql] def bindCatalogManagerForSessionFunctionKinds(cm: CatalogManager): Unit = {
+    catalogManagerForSessionFunctionKinds = Some(cm)
+  }
+
+  /**
+   * Pin session function kinds for tests (`None` clears). Uses `private[sql]` so tests under the
+   * `org.apache.spark.sql` package can control ordering without a public catalog API.
+   */
+  private[sql] def setSessionFunctionKindsTestOverride(
+      kinds: Option[Seq[SessionFunctionKind]]): Unit = {
+    sessionFunctionKindsTestOverride = kinds
   }
 
+  /**
+   * Session function kinds in resolution order for unqualified lookups: test override if set,
+   * else live PATH from [[catalogManagerForSessionFunctionKinds]], else
+   * [[SQLConf.defaultPathOrder]] with no catalog entries (so `system.builtin` and
+   * `system.session` are both reachable in standalone test mode).
+   *
+   * MUST NOT be called while holding [[SessionCatalog]]'s intrinsic lock (see SPARK-56939):
+   * the path-driven branch delegates to [[CatalogManager]], which has its own intrinsic lock
+   * and re-enters this catalog through `USE` paths, so nesting the two locks here would
+   * deadlock.
+   */
+  private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] =
+    sessionFunctionKindsTestOverride.getOrElse {
+      catalogManagerForSessionFunctionKinds match {
+        case Some(cm) =>
+          // Use the consolidated helper so unqualified resolution observes a consistent
+          // (currentCatalog, currentNamespace, path) triple in a single critical section.
+          cm.sessionFunctionKindsForUnqualifiedResolution()
+        case None =>
+          CatalogManager.systemFunctionKindsFromPath(conf.defaultPathOrder(Seq.empty))
+      }
+    }
+
+  /**
+   * True iff the effective SQL PATH searches `system.session` before `system.builtin`. Used
+   * to gate the security check that blocks temporary functions from silently shadowing a
+   * builtin of the same name.
+   */
+  private def sessionFirstInPath: Boolean =
+    sessionFunctionKindsInResolutionOrder.headOption.contains(Temp)
+
   /**
    * Checks if a namespace represents temporary functions.
    */
@@ -1054,10 +1119,15 @@ class SessionCatalog(
   def getRelation(
       metadata: CatalogTable,
       options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = {
-    val qualifiedIdent = qualifyIdentifier(metadata.identifier)
-    val db = qualifiedIdent.database.get
-    val table = qualifiedIdent.table
-    val multiParts = Seq(CatalogManager.SESSION_CATALOG_NAME, db, table)
+    // Prefer `multipartIdentifier` (set by non-session v2 catalogs via `V1Table.toCatalogTable`)
+    // so the SubqueryAlias qualifier reflects the real catalog + multi-part namespace.
+    // Fall back to the historical 3-part form for v1 session-catalog tables -- we intentionally
+    // always include `SESSION_CATALOG_NAME` here and ignore
+    // `LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME` to preserve pre-v2-MetadataTable behavior.
+    val multiParts = metadata.multipartIdentifier.getOrElse {
+      val qualifiedIdent = qualifyIdentifier(metadata.identifier)
+      Seq(CatalogManager.SESSION_CATALOG_NAME, qualifiedIdent.database.get, qualifiedIdent.table)
+    }
 
     if (CatalogTable.isMetricView(metadata)) {
       parseMetricViewDefinition(metadata)
@@ -1251,7 +1321,7 @@ class SessionCatalog(
       import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
       val ident = nameParts.asTableIdentifier
       try {
-        getTempViewOrPermanentTableMetadata(ident).tableType == CatalogTableType.VIEW
+        getTempViewOrPermanentTableMetadata(ident).isViewLike
       } catch {
         case _: NoSuchTableException => false
         case _: NoSuchNamespaceException => false
@@ -1817,11 +1887,6 @@ class SessionCatalog(
       name: String,
       function: SQLFunction,
       input: Seq[Expression]): LogicalPlan = {
-    def metaForFuncInputAlias = {
-      new MetadataBuilder()
-        .putString("__funcInputAlias", "true")
-        .build()
-    }
     assert(!function.isTableFunc,
       "Function '" + function.name + "' is a table function. " +
       "Use makeSQLTableFunctionPlan() instead of makeSQLFunctionPlan().")
@@ -1878,6 +1943,9 @@ class SessionCatalog(
             }
           }
 
+        val funcInputMetadata = new MetadataBuilder()
+          .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true)
+          .build()
         paddedInput.zip(param.fields).map {
           case (expr, param) =>
             // Add outer references to all resolved attributes and outer references in the function
@@ -1887,10 +1955,11 @@ class SessionCatalog(
               case a: Attribute if a.resolved => OuterReference(a)
               case o: OuterReference => OuterReference(o)
             }
+            // Mark the alias as function input so name resolution can give a parameterless
+            // built-in function precedence over a same-named UDF parameter.
             Alias(Cast(outer, param.dataType), param.name)(
               qualifier = qualifier,
-              // mark the alias as function input
-              explicitMetadata = Some(metaForFuncInputAlias))
+              explicitMetadata = Some(funcInputMetadata))
         }
       }.getOrElse(Nil)
 
@@ -1983,6 +2052,10 @@ class SessionCatalog(
           val outer = expr.transform {
             case a: Attribute => OuterReference(a)
           }
+          // No SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY marker here: a table UDF body references
+          // its parameter as an outer reference (the param lives in the lateral join's left
+          // child), so resolveColumnByName returns None and a parameterless built-in function
+          // already wins via the pre-existing "function beats outer reference" precedence.
           Alias(Cast(outer, param.dataType), param.name)(qualifier = qualifier)
       }
       val inputPlan = Project(inputCast, OneRowRelation())
@@ -2054,14 +2127,15 @@ class SessionCatalog(
       overrideIfExists: Boolean,
       functionBuilder: Option[FunctionBuilder] = None): Unit = {
     val builder = functionBuilder.getOrElse(makeFunctionBuilder(funcDefinition))
-    registerFunction(funcDefinition, overrideIfExists, functionRegistry, builder)
+    registerFunction(funcDefinition, overrideIfExists, functionRegistry, builder, info = None)
   }
 
   private def registerFunction[T](
       funcDefinition: CatalogFunction,
       overrideIfExists: Boolean,
       registry: FunctionRegistryBase[T],
-      functionBuilder: FunctionRegistryBase[T]#FunctionBuilder): Unit = {
+      functionBuilder: FunctionRegistryBase[T]#FunctionBuilder,
+      info: Option[ExpressionInfo]): Unit = {
     val func = funcDefinition.identifier
 
     // Determine the key to use for registration:
@@ -2075,12 +2149,11 @@ class SessionCatalog(
       qualifyIdentifier(func)
     }
 
-    // Security check: When legacy mode is enabled, block SQL-created temporary functions
-    // from shadowing builtin functions (to preserve master behavior)
-    // Scala UDFs are still allowed to shadow in legacy mode
-    // We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists
-    val sessionFirst = conf.sessionFunctionResolutionOrder == "first"
-    if (func.database.isEmpty && sessionFirst && !overrideIfExists) {
+    // Security check: when the effective SQL PATH searches `system.session` before
+    // `system.builtin`, block creating an unqualified temporary function whose name
+    // collides with a builtin so it cannot silently shadow that builtin via unqualified
+    // resolution. We throw ROUTINE_ALREADY_EXISTS to indicate the conflict.
+    if (func.database.isEmpty && sessionFirstInPath && !overrideIfExists) {
       val funcName = func.funcName
       // Check if function exists in builtin namespace (extensions are stored as builtins)
       val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName)
@@ -2093,8 +2166,18 @@ class SessionCatalog(
     if (registry.functionExists(identToRegister) && !overrideIfExists) {
       throw QueryCompilationErrors.functionAlreadyExistsError(func)
     }
-    val info = makeExprInfoForHiveFunction(funcDefinition)
-    registry.registerFunction(identToRegister, info, functionBuilder)
+    // Prefer caller-supplied info (the freshly-registered SQL UDF path passes a
+    // structured ExpressionInfo). Otherwise reconstruct one: SQL UDFs need the
+    // structured `usage` blob so DESCRIBE FUNCTION can rehydrate them; hive-style
+    // functions get the legacy info with `usage = null`.
+    val resolvedInfo = info.getOrElse {
+      if (funcDefinition.isUserDefinedFunction) {
+        UserDefinedFunction.fromCatalogFunction(funcDefinition, parser).toExpressionInfo
+      } else {
+        makeExprInfoForHiveFunction(funcDefinition)
+      }
+    }
+    registry.registerFunction(identToRegister, resolvedInfo, functionBuilder)
   }
 
   private def makeExprInfoForHiveFunction(func: CatalogFunction): ExpressionInfo = {
@@ -2190,10 +2273,11 @@ class SessionCatalog(
       // Use FunctionIdentifier with session namespace for temporary functions
       val tempIdentifier = tempFunctionIdentifier(function.name.funcName)
 
-      // Security check: When legacy mode is enabled, block SQL-created temporary functions
-      // from shadowing builtin functions (including extensions) as a safeguard
-      // We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists
-      if ((conf.sessionFunctionResolutionOrder == "first") && !overrideIfExists) {
+      // Security check: when the effective SQL PATH searches `system.session` before
+      // `system.builtin`, block creating an unqualified temporary function whose name
+      // collides with a builtin (including extensions) so it cannot silently shadow that
+      // builtin via unqualified resolution.
+      if (sessionFirstInPath && !overrideIfExists) {
         val funcName = function.name.funcName
         // Check if function exists in builtin namespace (extensions are stored as builtins)
         val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName)
@@ -2225,11 +2309,16 @@ class SessionCatalog(
       val info = function.toExpressionInfo
       registry.registerFunction(tempIdentifier, info, functionBuilder)
     } else {
+      // We already have the UserDefinedFunction in hand, so skip the
+      // CatalogFunction -> ExpressionInfo round trip inside `registerFunction`
+      // and pass the structured ExpressionInfo (with owner/createTime preserved
+      // at CREATE-time values) directly to the registry.
       registerFunction(
         function.toCatalogFunction,
         overrideIfExists,
         registry,
-        functionBuilder)
+        functionBuilder,
+        info = Some(function.toExpressionInfo))
     }
   }
 
@@ -2494,7 +2583,14 @@ class SessionCatalog(
    * Look up the `ExpressionInfo` of the given function by name.
    * Resolution order follows the configured path (e.g. builtin then session).
    */
-  def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = synchronized {
+  def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = {
+    // Intentionally not `synchronized` on this [[SessionCatalog]]: resolution order may call
+    // into [[CatalogManager]] (e.g. [[CatalogManager.sqlResolutionPathEntries]] via
+    // [[sessionFunctionKindsInResolutionOrder]]), which synchronizes on the manager. The
+    // SPARK-56939 fix removed the reverse `CatalogManager -> SessionCatalog` nest from the
+    // `USE`-style mutators that previously closed the deadlock cycle; keeping this method
+    // un-synchronized preserves the `SessionCatalog -> CatalogManager` direction as the
+    // single allowed ordering, so the invariant survives future regressions.
     lookupFunctionWithShadowing(name, tableFunctionRegistry, checkBuiltinOperators = false)
   }
 
@@ -2585,7 +2681,8 @@ class SessionCatalog(
                 funcMetadata,
                 overrideIfExists = false,
                 functionRegistry,
-                makeFunctionBuilder(funcMetadata))
+                makeFunctionBuilder(funcMetadata),
+                info = None)
             }
             functionRegistry.lookupFunctionBuilder(qualifiedIdent).get
           }
@@ -2645,7 +2742,12 @@ class SessionCatalog(
   /**
    * Look up the [[ExpressionInfo]] associated with the specified function, assuming it exists.
    */
-  def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = synchronized {
+  def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = {
+    // Intentionally not `synchronized` on this [[SessionCatalog]] (see
+    // [[lookupBuiltinOrTempTableFunction]]): unqualified builtin/temp resolution uses
+    // [[sessionFunctionKindsInResolutionOrder]] / [[CatalogManager]], and SPARK-56939
+    // requires this catalog's intrinsic lock to NEVER be held when reaching into
+    // [[CatalogManager]] from a function-resolution path.
     if (name.database.isEmpty) {
       lookupBuiltinOrTempFunction(name.funcName)
         .orElse(lookupBuiltinOrTempTableFunction(name.funcName))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala
index 4c7d8db6604b6..e9edd45fae514 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/VariableManager.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{FakeSystemCatalog, ResolvedIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier}
 import org.apache.spark.sql.connector.catalog.CatalogManager.{SESSION_NAMESPACE, SYSTEM_CATALOG_NAME}
 import org.apache.spark.sql.errors.DataTypeErrorsBase
@@ -49,8 +50,11 @@ trait VariableManager {
  *
    * @param nameParts Name parts of the variable.
    * @param varDef The new VariableDefinition of the variable.
+   * @param origin Origin of the SET reference, used in
+   *               [[org.apache.spark.sql.errors.QueryCompilationErrors.unresolvedVariableError]]
+   *               if the variable is unexpectedly absent at execution time.
    */
-  def set(nameParts: Seq[String], varDef: VariableDefinition): Unit
+  def set(nameParts: Seq[String], varDef: VariableDefinition, origin: Origin): Unit
 
 /**
  * Get an existing variable.
@@ -130,11 +134,14 @@ class TempVariableManager extends VariableManager with DataTypeErrorsBase {
     variables.put(name, varDef)
   }
 
-  override def set(nameParts: Seq[String], varDef: VariableDefinition): Unit = synchronized {
+  override def set(
+      nameParts: Seq[String],
+      varDef: VariableDefinition,
+      origin: Origin): Unit = synchronized {
     val name = nameParts.last
     // Sanity check as this is already checked in ResolveSetVariable.
     if (!variables.contains(name)) {
-      throw unresolvedVariableError(nameParts, Seq("SYSTEM", "SESSION"))
+      throw unresolvedVariableError(nameParts, Seq(Seq("SYSTEM", "SESSION")), origin)
     }
     variables.put(name, varDef)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 1cc4f7bcc3d29..6dda153985e56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -445,11 +445,30 @@ case class CatalogTable(
     tracksPartitionsInCatalog: Boolean = false,
     schemaPreservesCase: Boolean = true,
     ignoredProperties: Map[String, String] = Map.empty,
-    viewOriginalText: Option[String] = None)
+    viewOriginalText: Option[String] = None,
+    // Multi-part identifier [catalog, namespace..., name] for tables synthesized from a v2
+    // `MetadataTable` whose namespace has more than one part -- the v1 `identifier:
+    // TableIdentifier` (single-string database) cannot carry that losslessly. `None` for
+    // v1-native tables; callers should use `fullIdent` which falls back to `identifier.nameParts`.
+    multipartIdentifier: Option[Seq[String]] = None)
   extends MetadataMapSupport {
 
   import CatalogTable._
 
+  /**
+   * The fully-qualified multi-part identifier. Prefers `multipartIdentifier` when set (v2-sourced
+   * tables with multi-level namespaces); otherwise reconstructs from `identifier.nameParts`.
+   */
+  def fullIdent: Seq[String] = multipartIdentifier.getOrElse(identifier.nameParts)
+
+  /**
+   * Returns whether this table behaves like a view at resolution / DDL time. Today: VIEW or
+   * METRIC_VIEW. Forks may extend this set with additional view-like types, so call sites
+   * that need a uniform "is this view-like?" check should prefer this helper over inline
+   * disjunctions on `tableType`.
+   */
+  def isViewLike: Boolean = CatalogTable.isViewLike(tableType)
+
   /**
    * schema of this table's partition columns
    */
@@ -544,20 +563,7 @@ case class CatalogTable(
    * Return the schema binding mode. Defaults to SchemaBinding if not a view or an older
    * version, unless the viewSchemaBindingMode config is set to false
    */
-  def viewSchemaMode: ViewSchemaMode = {
-    if (!SQLConf.get.viewSchemaBindingEnabled) {
-      SchemaUnsupported
-    } else {
-      val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaBinding.toString)
-      schemaMode match {
-        case SchemaBinding.toString => SchemaBinding
-        case SchemaEvolution.toString => SchemaEvolution
-        case SchemaTypeEvolution.toString => SchemaTypeEvolution
-        case SchemaCompensation.toString => SchemaCompensation
-        case other => throw SparkException.internalError("Unexpected ViewSchemaMode")
-      }
-    }
-  }
+  def viewSchemaMode: ViewSchemaMode = CatalogTable.viewSchemaModeFromProperties(properties)
 
   /**
    * Return temporary view names the current view was referred. should be empty if the
@@ -704,6 +710,15 @@ case class CatalogTable(
       if (viewQueryOutputColumns != JNull) {
         map += "View Query Output Columns" -> viewQueryOutputColumns
       }
+    } else if (tableType == CatalogTableType.METRIC_VIEW) {
+      // METRIC_VIEW stores a YAML body in `viewText`, not a SQL query. The schema-binding
+      // fields used by plain VIEW (View Schema Mode, View Catalog and Namespace, SQL Path,
+      // View Query Output Columns) do not apply, so emit only `View Text` plus a `Language`
+      // tag so consumers can dispatch on the view_text format.
+      if (viewText.isDefined) {
+        map += "View Text" -> JString(viewText.get)
+      }
+      map += "Language" -> JString("YAML")
     }
     if (tableProperties != JNull) map += "Table Properties" -> tableProperties
     stats.foreach { s =>
@@ -744,15 +759,30 @@ object CatalogTable {
   val VIEW_CATALOG_AND_NAMESPACE = VIEW_PREFIX + "catalogAndNamespace.numParts"
   val VIEW_CATALOG_AND_NAMESPACE_PART_PREFIX = VIEW_PREFIX + "catalogAndNamespace.part."
 
-  // Property to indicate that a VIEW is actually a METRIC VIEW
-  val VIEW_WITH_METRICS = VIEW_PREFIX + "viewWithMetrics"
+  /**
+   * View sub-type marker persisted in `properties` so the metric-view distinction survives a
+   * round-trip through external catalogs whose enum can't carry it (e.g. the Hive Metastore,
+   * which only knows `VIRTUAL_VIEW`). When this property is set, the in-memory `tableType`
+   * upgrades from [[CatalogTableType.VIEW]] back to [[CatalogTableType.METRIC_VIEW]] on read.
+   */
+  val VIEW_SUB_TYPE = VIEW_PREFIX + "subType"
+  val VIEW_SUB_TYPE_METRIC_VIEW = "METRIC_VIEW"
 
   /**
-   * Check if a CatalogTable is a metric view by looking at its properties.
+   * Check if a CatalogTable is a metric view.
    */
   def isMetricView(table: CatalogTable): Boolean = {
-    table.tableType == CatalogTableType.VIEW &&
-      table.properties.get(VIEW_WITH_METRICS).contains("true")
+    table.tableType == CatalogTableType.METRIC_VIEW
+  }
+
+  /**
+   * Type-only form of [[CatalogTable.isViewLike]]; returns whether the given table type
+   * behaves like a view at resolution / DDL time. Use this overload when you have a
+   * [[CatalogTableType]] but no surrounding [[CatalogTable]] (e.g. inside `match`/`case`
+   * patterns or [[org.apache.spark.sql.catalyst.catalog.SessionCatalog.isView]]).
+   */
+  def isViewLike(tableType: CatalogTableType): Boolean = {
+    tableType == CatalogTableType.VIEW || tableType == CatalogTableType.METRIC_VIEW
   }
 
   // Convert the current catalog and namespace to properties.
@@ -789,6 +819,26 @@ object CatalogTable {
 
   val PROP_CLUSTERING_COLUMNS: String = "clusteringColumns"
 
+  /**
+   * Decode the view schema binding mode from a properties map. Shared between
+   * [[CatalogTable.viewSchemaMode]] and the v2 ALTER VIEW path which reads the mode directly
+   * from the existing view's [[TableInfo]] properties without materializing a full CatalogTable.
+   */
+  def viewSchemaModeFromProperties(properties: Map[String, String]): ViewSchemaMode = {
+    if (!SQLConf.get.viewSchemaBindingEnabled) {
+      SchemaUnsupported
+    } else {
+      val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaBinding.toString)
+      schemaMode match {
+        case SchemaBinding.toString => SchemaBinding
+        case SchemaEvolution.toString => SchemaEvolution
+        case SchemaTypeEvolution.toString => SchemaTypeEvolution
+        case SchemaCompensation.toString => SchemaCompensation
+        case _ => throw SparkException.internalError("Unexpected ViewSchemaMode")
+      }
+    }
+  }
+
   def splitLargeTableProp(
       key: String,
       value: String,
@@ -1071,8 +1121,9 @@ object CatalogTableType {
   val EXTERNAL = new CatalogTableType("EXTERNAL")
   val MANAGED = new CatalogTableType("MANAGED")
   val VIEW = new CatalogTableType("VIEW")
+  val METRIC_VIEW = new CatalogTableType("METRIC_VIEW")
 
-  val tableTypes = Seq(EXTERNAL, MANAGED, VIEW)
+  val tableTypes = Seq(EXTERNAL, MANAGED, VIEW, METRIC_VIEW)
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
index 0fce96c159979..c8f51362a37aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.types.{PhysicalBinaryType, PhysicalIntegerT
 import org.apache.spark.sql.catalyst.types.ops.TypeOps
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, DataType, DateType, DayTimeIntervalType, Decimal, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, ObjectType, ShortType, StringType, StructType, TimestampNTZType, TimestampType, TimeType, UserDefinedType, VariantType, YearMonthIntervalType}
-import org.apache.spark.unsafe.types.{CalendarInterval, GeographyVal, GeometryVal, UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{BinaryView, CalendarInterval, UTF8String, VariantVal}
 
 /**
  * :: DeveloperApi ::
@@ -111,8 +111,7 @@ object EncoderUtils {
       case _: StructType => classOf[InternalRow]
       case _: ArrayType => classOf[ArrayData]
       case _: MapType => classOf[MapData]
-      case _: GeographyType => classOf[GeographyVal]
-      case _: GeometryType => classOf[GeometryVal]
+      case _: GeographyType | _: GeometryType => classOf[BinaryView]
       case ObjectType(cls) => cls
       case _ => typeJavaMapping.getOrElse(dt, classOf[java.lang.Object])
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
index 2340385dcdd66..f1cb20ca40619 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.annotation.tailrec
 
-import org.apache.spark.sql.catalyst.analysis.MultiAlias
+import org.apache.spark.sql.catalyst.analysis.{MultiAlias, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Project}
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
@@ -112,6 +112,10 @@ trait AliasHelper {
   }
 
   protected def trimAliases(e: Expression): Expression = e match {
+    // SPARK-48091: Do not descend into unresolved function calls. Aliases inside them
+    // (e.g., UnresolvedFunction("struct", Seq(Alias(x, "data")))) carry semantic information
+    // that ResolveFunctions -> CreateStruct.apply consumes to produce field names.
+    case u: UnresolvedFunction => u
     // The children of `CreateNamedStruct` may use `Alias` to carry metadata and we should not
     // trim them.
     case c: CreateNamedStruct => c.mapChildren {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index c51d3508d04a4..1e173c5c23d73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte,
 import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{BinaryView, UTF8String, VariantVal}
 import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -1211,13 +1211,13 @@ case class Cast(
     case _: GeographyType =>
       identity
     case _: GeometryType =>
-      buildCast[GeometryVal](_, STUtils.geometryToGeography)
+      buildCast[BinaryView](_, STUtils.geometryToGeography)
   }
 
   // GeometryConverter
   private[this] def castToGeometry(from: DataType): Any => Any = from match {
     case _: GeographyType =>
-      buildCast[GeographyVal](_, STUtils.geographyToGeometry)
+      buildCast[BinaryView](_, STUtils.geographyToGeometry)
     case _: GeometryType =>
       identity
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala
index b65576403e9d8..1c33ed65c1df1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.trees.UnaryLike
@@ -90,7 +91,7 @@ case class DynamicPruningSubquery(
     copy(
       pruningKey = pruningKey.canonicalized,
       buildQuery = buildQuery.canonicalized,
-      buildKeys = buildKeys.map(_.canonicalized),
+      buildKeys = buildKeys.map(QueryPlan.normalizeExpressions(_, buildQuery.output)),
       exprId = ExprId(0))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
index 53b3e0598d586..86cf40cd9b043 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
@@ -163,9 +163,7 @@ object InterpretedUnsafeProjection {
 
         case _: PhysicalStringType => (v, i) => writer.write(i, v.getUTF8String(i))
 
-        case _: PhysicalGeographyType => (v, i) => writer.write(i, v.getGeography(i))
-
-        case _: PhysicalGeometryType => (v, i) => writer.write(i, v.getGeometry(i))
+        case _: PhysicalBinaryViewType => (v, i) => writer.write(i, v.getBinaryView(i))
 
         case PhysicalVariantType => (v, i) => writer.write(i, v.getVariant(i))
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala
index 4211dd5e4df01..f05a80cf7f43c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala
@@ -114,11 +114,8 @@ class JoinedRow extends InternalRow {
   override def getBinary(i: Int): Array[Byte] =
     if (i < row1.numFields) row1.getBinary(i) else row2.getBinary(i - row1.numFields)
 
-  override def getGeography(i: Int): GeographyVal =
-    if (i < row1.numFields) row1.getGeography(i) else row2.getGeography(i - row1.numFields)
-
-  override def getGeometry(i: Int): GeometryVal =
-    if (i < row1.numFields) row1.getGeometry(i) else row2.getGeometry(i - row1.numFields)
+  override def getBinaryView(i: Int): BinaryView =
+    if (i < row1.numFields) row1.getBinaryView(i) else row2.getBinaryView(i - row1.numFields)
 
   override def getArray(i: Int): ArrayData =
     if (i < row1.numFields) row1.getArray(i) else row2.getArray(i - row1.numFields)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
index d747bebd5cfe6..c561677ed5ad5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
@@ -26,12 +26,13 @@ import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchFunctionException, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.encoders.EncoderUtils
 import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, SampleMethod}
 import org.apache.spark.sql.connector.catalog.{FunctionCatalog, Identifier}
 import org.apache.spark.sql.connector.catalog.functions._
 import org.apache.spark.sql.connector.catalog.functions.ScalarFunction.MAGIC_METHOD_NAME
 import org.apache.spark.sql.connector.expressions.{BucketTransform, Cast => V2Cast, Expression => V2Expression, FieldReference, GeneralScalarExpression, IdentityTransform, Literal => V2Literal, NamedReference, NamedTransform, NullOrdering => V2NullOrdering, SortDirection => V2SortDirection, SortOrder => V2SortOrder, SortValue, Transform}
 import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue}
+import org.apache.spark.sql.connector.read.{SampleMethod => V2SampleMethod}
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.connector.PartitionPredicateImpl
@@ -168,6 +169,11 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
     case V2NullOrdering.NULLS_LAST => NullsLast
   }
 
+  def toCatalyst(sampleMethod: V2SampleMethod): SampleMethod = sampleMethod match {
+    case V2SampleMethod.BERNOULLI => SampleMethod.Bernoulli
+    case V2SampleMethod.SYSTEM => SampleMethod.System
+  }
+
   def resolveScalarFunction(
       scalarFunc: ScalarFunction[_],
       arguments: Seq[Expression]): Expression = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index ee06147b03944..052b60fdd41fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.optimizer.NormalizeFloatingNumbers
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.types.PhysicalDataType
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils, UnsafeRowUtils}
@@ -206,6 +207,10 @@ case class CollectSet(
 
   override lazy val bufferElementType = child.dataType match {
     case BinaryType => ArrayType(ByteType)
+    // Float/double are keyed by their bit pattern (see convertToBufferElement), so the
+    // buffer holds the integral bits; eval() converts them back to float/double.
+    case DoubleType => LongType
+    case FloatType => IntegerType
     case other => other
   }
 
@@ -222,6 +227,12 @@ case class CollectSet(
     buffer
   }
 
+  @transient private lazy val complexNormalizer: Any => Any = {
+    val ref = BoundReference(0, child.dataType, nullable = true)
+    val proj = UnsafeProjection.create(NormalizeFloatingNumbers.normalize(ref))
+    (value: Any) => InternalRow.copyValue(proj(InternalRow(value)).get(0, child.dataType))
+  }
+
   override def convertToBufferElement(value: Any): Any = child.dataType match {
     /*
      * collect_set() of BinaryType should not return duplicate elements,
@@ -229,6 +240,17 @@ case class CollectSet(
      * so we need to use a different catalyst value for arrays
      */
     case BinaryType => UnsafeArrayData.fromPrimitiveArray(value.asInstanceOf[Array[Byte]])
+    // mutable.HashSet[Any] compares boxed Double/Float with IEEE equality, where NaN != NaN,
+    // so normalizing the value alone wouldn't collapse NaNs - keying on doubleToLongBits/
+    // floatToIntBits does (and the NORMALIZER step keeps -0.0/0.0 deduped). Complex types
+    // instead dedup on a normalized UnsafeRow's binary form.
+    case DoubleType =>
+      java.lang.Double.doubleToLongBits(
+        NormalizeFloatingNumbers.DOUBLE_NORMALIZER(value).asInstanceOf[Double])
+    case FloatType =>
+      java.lang.Float.floatToIntBits(
+        NormalizeFloatingNumbers.FLOAT_NORMALIZER(value).asInstanceOf[Float])
+    case dt if NormalizeFloatingNumbers.needNormalize(dt) => complexNormalizer(value)
     case _ => InternalRow.copyValue(value)
   }
 
@@ -239,6 +261,16 @@ case class CollectSet(
           case null => null
           case v => v.asInstanceOf[ArrayData].toByteArray()
         }.toArray[Any]
+      case DoubleType =>
+        buffer.iterator.map {
+          case null => null
+          case v => java.lang.Double.longBitsToDouble(v.asInstanceOf[Long])
+        }.toArray[Any]
+      case FloatType =>
+        buffer.iterator.map {
+          case null => null
+          case v => java.lang.Float.intBitsToFloat(v.asInstanceOf[Int])
+        }.toArray[Any]
       case _ => buffer.toArray
     }
     new GenericArrayData(array)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/kllAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/kllAggregates.scala
index 6e3ea19425d9c..80ac45d273afa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/kllAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/kllAggregates.scala
@@ -485,7 +485,7 @@ case class KllSketchAggDouble(
        6
   """,
   group = "agg_funcs",
-  since = "4.1.0")
+  since = "4.1.2")
 // scalastyle:on line.size.limit
 case class KllMergeAggBigint(
     child: Expression,
@@ -558,7 +558,7 @@ case class KllMergeAggBigint(
        6
   """,
   group = "agg_funcs",
-  since = "4.1.0")
+  since = "4.1.2")
 // scalastyle:on line.size.limit
 case class KllMergeAggFloat(
     child: Expression,
@@ -631,7 +631,7 @@ case class KllMergeAggFloat(
        6
   """,
   group = "agg_funcs",
-  since = "4.1.0")
+  since = "4.1.2")
 // scalastyle:on line.size.limit
 case class KllMergeAggDouble(
     child: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 080186431eb7d..05e3a0e3aa356 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -175,6 +175,13 @@ class CodegenContext extends Logging {
    */
   var currentVars: Seq[ExprCode] = null
 
+  /**
+   * Java expression used by leaf operators (e.g. `RangeExec`, `SampleExec`)
+   * in place of the stage's `partitionIndex` field. `UnionExec` overrides
+   * this per child so that fused children observe their child-local index.
+   */
+  var currentPartitionIndexVar: String = "partitionIndex"
+
   /**
    * Holding expressions' inlined mutable states like `MonotonicallyIncreasingID.count` as a
    * 2-tuple: java type, variable name.
@@ -1143,14 +1150,22 @@ class CodegenContext extends Logging {
    *      evaluation, we can look for generated subexpressions and do replacement.
    */
   def subexpressionEliminationForWholeStageCodegen(expressions: Seq[Expression]): SubExprCodes = {
-    // Create a clear EquivalentExpressions and SubExprEliminationState mapping
+    // Create a clear EquivalentExpressions and compute the common subexpressions.
     val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+    expressions.foreach(equivalentExpressions.addExprTree(_))
+    subexpressionEliminationForWholeStageCodegen(equivalentExpressions)
+  }
+
+  /**
+   * Same as above, but takes a pre-built [[EquivalentExpressions]]. A caller that has already
+   * analyzed the expressions (e.g. to decide whether any common subexpression exists) can reuse
+   * that analysis here instead of rebuilding it.
+   */
+  def subexpressionEliminationForWholeStageCodegen(
+      equivalentExpressions: EquivalentExpressions): SubExprCodes = {
     val localSubExprEliminationExprsForNonSplit =
       mutable.HashMap.empty[ExpressionEquals, SubExprEliminationState]
 
-    // Add each expression tree and compute the common subexpressions.
-    expressions.foreach(equivalentExpressions.addExprTree(_))
-
     // Get all the expressions that appear at least twice and set up the state for subexpression
     // elimination.
     val commonExprs = equivalentExpressions.getCommonSubexpressions
@@ -1519,8 +1534,7 @@ object CodeGenerator extends Logging {
       classOf[Platform].getName,
       classOf[InternalRow].getName,
       classOf[UnsafeRow].getName,
-      classOf[GeographyVal].getName,
-      classOf[GeometryVal].getName,
+      classOf[BinaryView].getName,
       classOf[UTF8String].getName,
       classOf[Decimal].getName,
       classOf[CalendarInterval].getName,
@@ -1685,8 +1699,7 @@ object CodeGenerator extends Logging {
       case _ => PhysicalDataType(dataType) match {
         case _: PhysicalArrayType => s"$input.getArray($ordinal)"
         case PhysicalBinaryType => s"$input.getBinary($ordinal)"
-        case _: PhysicalGeographyType => s"$input.getGeography($ordinal)"
-        case _: PhysicalGeometryType => s"$input.getGeometry($ordinal)"
+        case _: PhysicalBinaryViewType => s"$input.getBinaryView($ordinal)"
         case PhysicalCalendarIntervalType => s"$input.getInterval($ordinal)"
         case t: PhysicalDecimalType => s"$input.getDecimal($ordinal, ${t.precision}, ${t.scale})"
         case _: PhysicalMapType => s"$input.getMap($ordinal)"
@@ -1763,9 +1776,11 @@ object CodeGenerator extends Logging {
       case CalendarIntervalType => s"$row.setInterval($ordinal, $value)"
       case t: DecimalType => s"$row.setDecimal($ordinal, $value, ${t.precision})"
       case udt: UserDefinedType[_] => setColumn(row, udt.sqlType, ordinal, value)
-      // The UTF8String, InternalRow, ArrayData and MapData may came from UnsafeRow, we should copy
-      // it to avoid keeping a "pointer" to a memory region which may get updated afterwards.
-      case _: StringType | _: StructType | _: ArrayType | _: MapType =>
+      // The UTF8String, BinaryView, InternalRow, ArrayData and MapData may came from UnsafeRow, we
+      // should copy it to avoid keeping a "pointer" to a memory region which may get updated
+      // afterwards.
+      case _: StringType | _: GeometryType | _: GeographyType |
+           _: StructType | _: ArrayType | _: MapType =>
         s"$row.update($ordinal, $value.copy())"
       case _ => s"$row.update($ordinal, $value)"
     }
@@ -1965,8 +1980,7 @@ object CodeGenerator extends Logging {
    * Returns the Java type for a DataType.
    */
   def javaType(dt: DataType): String = dt match {
-    case _: GeographyType => "GeographyVal"
-    case _: GeometryType => "GeometryVal"
+    case _: GeographyType | _: GeometryType => "BinaryView"
     case udt: UserDefinedType[_] => javaType(udt.sqlType)
     case ObjectType(cls) if cls.isArray => s"${javaType(ObjectType(cls.getComponentType))}[]"
     case ObjectType(cls) => cls.getName
@@ -2004,8 +2018,7 @@ object CodeGenerator extends Logging {
     case DoubleType => java.lang.Double.TYPE
     case _: DecimalType => classOf[Decimal]
     case BinaryType => classOf[Array[Byte]]
-    case _: GeographyType => classOf[GeographyVal]
-    case _: GeometryType => classOf[GeometryVal]
+    case _: GeographyType | _: GeometryType => classOf[BinaryView]
     case _: StringType => classOf[UTF8String]
     case CalendarIntervalType => classOf[CalendarInterval]
     case _: StructType => classOf[InternalRow]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
index 9d7c2236678d1..c3db6fca6a861 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_COLLATION}
 import org.apache.spark.sql.catalyst.util.{AttributeNameParser, CollationFactory}
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 
@@ -54,10 +53,6 @@ object CollateExpressionBuilder extends ExpressionBuilder {
             if (evalCollation == null) {
               throw QueryCompilationErrors.unexpectedNullError("collation", collationExpr)
             } else {
-              if (!SQLConf.get.trimCollationEnabled &&
-                evalCollation.toString.toUpperCase().contains("TRIM")) {
-                throw QueryCompilationErrors.trimCollationNotEnabledError()
-              }
               Collate(e, UnresolvedCollation(
                 AttributeNameParser.parseAttributeName(evalCollation.toString)))
             }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 60966f3098ca8..cf66932d882ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -32,12 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike}
-import org.apache.spark.sql.catalyst.trees.TreePattern.{
-  ARRAYS_ZIP,
-  CONCAT,
-  MAP_FROM_ENTRIES,
-  TreePattern
-}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{ARRAY_DISTINCT, ARRAY_EXCEPT, ARRAY_INTERSECT, ARRAY_UNION, ARRAYS_OVERLAP, ARRAYS_ZIP, CONCAT, MAP_FROM_ENTRIES, TreePattern}
 import org.apache.spark.sql.catalyst.types.{DataTypeUtils, PhysicalDataType, PhysicalIntegralType}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
@@ -1809,6 +1804,9 @@ case class ArrayAppend(left: Expression, right: Expression) extends ArrayPendBas
 // scalastyle:off line.size.limit
 case class ArraysOverlap(left: Expression, right: Expression)
   extends BinaryArrayExpressionWithImplicitCast with Predicate {
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(ARRAYS_OVERLAP)
+
   override def nullIntolerant: Boolean = true
 
   override def checkInputDataTypes(): TypeCheckResult = super.checkInputDataTypes() match {
@@ -2310,9 +2308,16 @@ case class ArrayJoin(
         }
       }
     } else {
+      // When array and delimiter are both non-nullable, neither nullSafeExec wrapper above runs,
+      // so reset ev.isNull here. doGenCode initializes ev.isNull to true whenever the expression
+      // is nullable (e.g. a nullable nullReplacement), and without this reset the computed result
+      // would be discarded as NULL. When the expression is non-nullable, ev.isNull is a literal
+      // false and must not be assigned.
+      val resetIsNull = if (nullable) s"${ev.isNull} = false;" else ""
       s"""
          |${arrayGen.code}
          |${delimiterGen.code}
+         |$resetIsNull
          |$resultCode""".stripMargin
     }
   }
@@ -4212,6 +4217,9 @@ trait ArraySetLike {
   since = "2.4.0")
 case class ArrayDistinct(child: Expression)
   extends UnaryExpression with ArraySetLike with ExpectsInputTypes {
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(ARRAY_DISTINCT)
+
   override def nullIntolerant: Boolean = true
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
 
@@ -4408,6 +4416,8 @@ trait ArrayBinaryLike
 case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLike
   with ComplexTypeMergingExpression {
 
+  final override val nodePatterns: Seq[TreePattern] = Seq(ARRAY_UNION)
+
   @transient lazy val evalUnion: (ArrayData, ArrayData) => ArrayData = {
     if (TypeUtils.typeWithProperEquals(elementType)) {
       (array1, array2) =>
@@ -4585,6 +4595,8 @@ case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLi
 case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBinaryLike
   with ComplexTypeMergingExpression {
 
+  final override val nodePatterns: Seq[TreePattern] = Seq(ARRAY_INTERSECT)
+
   private lazy val internalDataType: DataType = {
     dataTypeCheck
     ArrayType(elementType, leftArrayElementNullable && rightArrayElementNullable)
@@ -4800,7 +4812,7 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
 }
 
 /**
- * Returns an array of the elements in the intersect of x and y, without duplicates
+ * Returns an array of the elements in x but not in y, without duplicates
  */
 @ExpressionDescription(
   usage = """
@@ -4817,6 +4829,8 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
 case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryLike
   with ComplexTypeMergingExpression {
 
+  final override val nodePatterns: Seq[TreePattern] = Seq(ARRAY_EXCEPT)
+
   private lazy val internalDataType: DataType = {
     dataTypeCheck
     left.dataType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 8cc71381ddab7..ed72e4dc21018 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -52,6 +52,25 @@ object ExtractValue {
     }
   }
 
+  /**
+   * Resolution-time variant of [[apply]]: extracting a field/element/key from a NULL (`NullType`)
+   * base yields NULL (SQL NULL propagation) instead of throwing `INVALID_EXTRACT_BASE_FIELD_TYPE`.
+   * A `NullType` column can arise e.g. from schema evolution with missing columns. This is used by
+   * the user-facing extraction resolution sites (multipart name resolution and
+   * `UnresolvedExtractValue` resolution). `extractValue` itself is left unchanged, so the other
+   * direct consumers keep their prior (throwing) behavior.
+   */
+  def applyOrNull(
+      child: Expression,
+      extraction: Expression,
+      resolver: Resolver): Expression = {
+    if (child.dataType == NullType) {
+      Literal(null, NullType)
+    } else {
+      apply(child, extraction, resolver)
+    }
+  }
+
   /**
    * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`,
    * depend on the type of `child` and `extraction`.
@@ -119,13 +138,21 @@ object ExtractValue {
     val withExtractedNestedFields = nestedFields
       .foldLeft(Some(attribute): Option[Expression]) {
         case (Some(expression), field) =>
-          ExtractValue.extractValue(
-            child = expression,
-            extraction = Literal(field),
-            resolver = resolver
-          ) match {
-            case Left(e) => Some(e)
-            case Right(_) => None
+          // Extraction from a NULL (NullType) base propagates NULL rather than failing, matching
+          // the user-facing resolution sites (which use applyOrNull). Treating it as extractable
+          // here keeps the NullType candidate in single-pass NameScope candidate filtering so it
+          // resolves consistently with the legacy analyzer.
+          if (expression.dataType == NullType) {
+            Some(Literal(null, NullType))
+          } else {
+            ExtractValue.extractValue(
+              child = expression,
+              extraction = Literal(field),
+              resolver = resolver
+            ) match {
+              case Left(e) => Some(e)
+              case Right(_) => None
+            }
           }
         case _ =>
           None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 226e098165b82..aa4ed692d5745 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -24,9 +24,11 @@ import java.util.Locale
 
 import org.apache.commons.text.StringEscapeUtils
 
-import org.apache.spark.{SparkDateTimeException, SparkIllegalArgumentException}
+import org.apache.spark.{SparkDateTimeException, SparkException, SparkIllegalArgumentException}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.Cast.{ordinalNumber, toSQLExpr, toSQLId, toSQLType, toSQLValue}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
@@ -3897,3 +3899,199 @@ case class TimestampDiff(
     copy(startTimestamp = newLeft, endTimestamp = newRight)
   }
 }
+
+/**
+ * Aligns a timestamp to the start of a fixed-size interval bucket.
+ *
+ * Returns the start of the half-open bucket [start, start + bucketSize) containing ts.
+ * For TIMESTAMP_NTZ, bucketing is performed in UTC. For TIMESTAMP, year-month
+ * interval buckets and calendar-day components of day-time interval buckets align
+ * to the session time zone.
+ */
+case class TimeBucket(
+    bucketSize: Expression,
+    ts: Expression,
+    originTs: Expression,
+    timeZoneId: Option[String] = None)
+  extends TernaryExpression with ExpectsInputTypes with TimeZoneAwareExpression {
+
+  override def nullIntolerant: Boolean = true
+
+  override def first: Expression = bucketSize
+  override def second: Expression = ts
+  override def third: Expression = originTs
+
+  override def withTimeZone(timeZoneId: String): TimeBucket =
+    copy(timeZoneId = Option(timeZoneId))
+
+  @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(ts.dataType)
+
+  def this(bucketSize: Expression, ts: Expression, originTs: Expression) =
+    this(bucketSize, ts, originTs, None)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(
+    TypeCollection(DayTimeIntervalType, YearMonthIntervalType),
+    AnyTimestampType,
+    AnyTimestampType)
+
+  override def dataType: DataType = ts.dataType
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val defaultCheck = super.checkInputDataTypes()
+    if (defaultCheck.isFailure) return defaultCheck
+
+    if (!bucketSize.foldable) {
+      return DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> toSQLId("bucketSize"),
+          "inputType" -> toSQLType(bucketSize.dataType),
+          "inputExpr" -> toSQLExpr(bucketSize)))
+    }
+
+    val bucketSizeValue = bucketSize.eval()
+    if (bucketSizeValue != null) {
+      val isNonPositive = bucketSize.dataType match {
+        case _: DayTimeIntervalType => bucketSizeValue.asInstanceOf[Long] <= 0
+        case _: YearMonthIntervalType => bucketSizeValue.asInstanceOf[Int] <= 0
+        case other => throw SparkException.internalError(
+          s"Unexpected bucketSize type: $other")
+      }
+      if (isNonPositive) {
+        return DataTypeMismatch(
+          errorSubClass = "VALUE_OUT_OF_RANGE",
+          messageParameters = Map(
+            "exprName" -> toSQLId("bucketSize"),
+            "valueRange" -> "(0, inf)",
+            "currentValue" -> toSQLValue(bucketSizeValue, bucketSize.dataType)))
+      }
+    }
+
+    if (!originTs.foldable) {
+      return DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> toSQLId("origin"),
+          "inputType" -> toSQLType(originTs.dataType),
+          "inputExpr" -> toSQLExpr(originTs)))
+    }
+
+    if (ts.dataType != originTs.dataType) {
+      return DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> ordinalNumber(2),
+          "requiredType" -> toSQLType(ts.dataType),
+          "inputSql" -> toSQLExpr(originTs),
+          "inputType" -> toSQLType(originTs.dataType)))
+    }
+
+    TypeCheckSuccess
+  }
+
+  override def nullSafeEval(bucketSizeVal: Any, tsVal: Any, originVal: Any): Any = {
+    first.dataType match {
+      case _: DayTimeIntervalType =>
+        DateTimeUtils.timeBucketDTInterval(
+          bucketSizeVal.asInstanceOf[Long], tsVal.asInstanceOf[Long],
+          originVal.asInstanceOf[Long], zoneIdInEval)
+      case _: YearMonthIntervalType =>
+        DateTimeUtils.timeBucketYMInterval(
+          bucketSizeVal.asInstanceOf[Int], tsVal.asInstanceOf[Long],
+          originVal.asInstanceOf[Long], zoneIdInEval)
+      case other => throw SparkException.internalError(
+        s"Unexpected bucketSize type: $other")
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
+    first.dataType match {
+      case _: DayTimeIntervalType =>
+        defineCodeGen(ctx, ev, (bucketSizeCode, tsCode, originCode) =>
+          s"$dtu.timeBucketDTInterval($bucketSizeCode, $tsCode, $originCode, $zid)")
+      case _: YearMonthIntervalType =>
+        defineCodeGen(ctx, ev, (bucketSizeCode, tsCode, originCode) =>
+          s"$dtu.timeBucketYMInterval($bucketSizeCode, $tsCode, $originCode, $zid)")
+      case other => throw SparkException.internalError(
+        s"Unexpected bucketSize type: $other")
+    }
+  }
+
+  override def prettyName: String = "time_bucket"
+
+  override protected def withNewChildrenInternal(
+      newFirst: Expression, newSecond: Expression, newThird: Expression): TimeBucket =
+    copy(bucketSize = newFirst, ts = newSecond, originTs = newThird)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(bucketSize, ts[, origin]) - Returns the start of the bucket that `ts` falls into,
+      where buckets are defined by the given `bucketSize` interval aligned to optional `origin`.
+      For `TIMESTAMP_NTZ`, bucketing is performed in UTC. For `TIMESTAMP`, year-month
+      interval buckets and calendar-day components of day-time interval buckets align
+      to the session time zone.
+  """,
+  arguments = """
+    Arguments:
+      * bucketSize - A day-time or year-month interval defining the bucket size. Must be positive and foldable.
+      * ts - A TIMESTAMP or TIMESTAMP_NTZ value to bucket.
+      * origin - Optional TIMESTAMP or TIMESTAMP_NTZ alignment anchor. Defaults to 1970-01-01 00:00:00. Must be the same type as ts and must be foldable.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00');
+       2024-01-01 11:15:00
+      > SELECT _FUNC_(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00');
+       2024-01-01 11:00:00
+      > SELECT _FUNC_(INTERVAL '1' MONTH, TIMESTAMP '2024-07-20 14:30:00', TIMESTAMP '2024-06-15 09:00:00');
+       2024-07-15 09:00:00
+  """,
+  since = "4.2.0",
+  group = "datetime_funcs")
+// scalastyle:on line.size.limit
+object TimeBucketExpressionBuilder extends ExpressionBuilder {
+  private def retypeNull(e: Expression, dt: DataType): Expression = e match {
+    case Literal(null, NullType) => Literal(null, dt)
+    case _ => e
+  }
+
+  // Default origin: 1970-01-01 00:00:00 in the session time zone for TIMESTAMP, and
+  // EPOCH (1970-01-01 00:00:00 UTC) for TIMESTAMP_NTZ.
+  private def defaultOrigin(tsType: DataType): Literal = tsType match {
+    case TimestampType =>
+      val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
+      Literal(DateTimeUtils.daysToMicros(0, zoneId), TimestampType)
+    case _ => Literal(0L, tsType)
+  }
+
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    expressions match {
+      case Seq(rawBucketSize, rawTs) =>
+        val bucketSize = retypeNull(rawBucketSize, DayTimeIntervalType())
+        // Fall back to TimestampType for bad ts types; ExpectsInputTypes will report it.
+        val tsType = rawTs.dataType match {
+          case t if AnyTimestampType.acceptsType(t) => t
+          case _ => TimestampType
+        }
+        val ts = retypeNull(rawTs, tsType)
+        TimeBucket(bucketSize, ts, defaultOrigin(tsType))
+      case Seq(rawBucketSize, rawTs, rawOrigin) =>
+        val bucketSize = retypeNull(rawBucketSize, DayTimeIntervalType())
+        val tsType = (rawTs.dataType, rawOrigin.dataType) match {
+          case (NullType, t) if AnyTimestampType.acceptsType(t) => t
+          case (NullType, _) => TimestampType
+          case (t, _) => t
+        }
+        val ts = retypeNull(rawTs, tsType)
+        val originTs = retypeNull(rawOrigin, tsType)
+        TimeBucket(bucketSize, ts, originTs)
+      case _ =>
+        throw QueryCompilationErrors.wrongNumArgsError(
+          funcName, Seq(2, 3), expressions.length)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 3b222ca05235a..c749c83138535 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -256,8 +256,7 @@ object Literal {
         case PhysicalNullType => true
         case PhysicalShortType => v.isInstanceOf[Short]
         case _: PhysicalStringType => v.isInstanceOf[UTF8String]
-        case _: PhysicalGeographyType => v.isInstanceOf[GeographyVal]
-        case _: PhysicalGeometryType => v.isInstanceOf[GeometryVal]
+        case _: PhysicalBinaryViewType => v.isInstanceOf[BinaryView]
         case PhysicalVariantType => v.isInstanceOf[VariantVal]
         case st: PhysicalStructType =>
           v.isInstanceOf[InternalRow] && {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index ccefdc0999ea8..d27f140d083b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -727,14 +727,20 @@ object FileSourceMetadataAttribute {
    *
    * The set of supported types is limited by [[ColumnVectorUtils.populate]], which the constant
    * file metadata implementation relies on. In general, types that can be partition columns are
-   * supported (including most primitive types). Notably unsupported types include [[ObjectType]],
-   * [[UserDefinedType]], and the complex types ([[StructType]], [[MapType]], [[ArrayType]]).
+   * supported (including most primitive types), plus the complex types [[ArrayType]],
+   * [[MapType]], and [[StructType]] (recursively, as long as their element types are supported).
+   * Notably unsupported types include [[ObjectType]] and [[UserDefinedType]].
    */
-  def isSupportedType(dataType: DataType): Boolean = PhysicalDataType(dataType) match {
-    // PhysicalPrimitiveType covers: Boolean, Byte, Double, Float, Integer, Long, Null, Short
-    case _: PhysicalPrimitiveType | _: PhysicalDecimalType => true
-    case PhysicalBinaryType | PhysicalStringType(_) | PhysicalCalendarIntervalType => true
-    case _ => false
+  def isSupportedType(dataType: DataType): Boolean = dataType match {
+    case ArrayType(elementType, _) => isSupportedType(elementType)
+    case MapType(keyType, valueType, _) => isSupportedType(keyType) && isSupportedType(valueType)
+    case st: StructType => st.fields.forall(f => isSupportedType(f.dataType))
+    case _ => PhysicalDataType(dataType) match {
+      // PhysicalPrimitiveType covers: Boolean, Byte, Double, Float, Integer, Long, Null, Short
+      case _: PhysicalPrimitiveType | _: PhysicalDecimalType => true
+      case PhysicalBinaryType | PhysicalStringType(_) | PhysicalCalendarIntervalType => true
+      case _ => false
+    }
   }
 
   /** Returns the type unchanged if valid; otherwise throws [[IllegalArgumentException]]. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 1aa1d0b25e44c..e7be588c4b465 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -145,6 +145,21 @@ case class Coalesce(children: Seq[Expression])
     copy(children = newChildren)
 }
 
+private case class TypedNullLiteral(child: Expression)
+    extends UnaryExpression with RuntimeReplaceable {
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = child.dataType
+
+  override def toString: String = "null"
+
+  override def sql: String = "NULL"
+
+  override lazy val replacement: Expression = Literal.create(null, child.dataType)
+
+  override protected def withNewChildInternal(newChild: Expression): TypedNullLiteral =
+    copy(child = newChild)
+}
 
 @ExpressionDescription(
   usage = "_FUNC_(expr1, expr2) - Returns null if `expr1` equals to `expr2`, or `expr1` otherwise.",
@@ -162,10 +177,10 @@ case class NullIf(left: Expression, right: Expression, replacement: Expression)
     this(left, right,
       if (!SQLConf.get.getConf(SQLConf.ALWAYS_INLINE_COMMON_EXPR)) {
         With(left) { case Seq(ref) =>
-          If(EqualTo(ref, right), Literal.create(null, left.dataType), ref)
+          If(EqualTo(ref, right), TypedNullLiteral(ref), ref)
         }
       } else {
-        If(EqualTo(left, right), Literal.create(null, left.dataType), left)
+        If(EqualTo(left, right), TypedNullLiteral(left), left)
       }
     )
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 114a43c34c040..a3008a949ec07 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -396,7 +396,9 @@ package object expressions  {
           // Then this will add ExtractValue("c", ExtractValue("b", a)), and alias the final
           // expression as "c".
           val fieldExprs = nestedFields.foldLeft(a: Expression) { (e, name) =>
-            ExtractValue(e, Literal(name), resolver)
+            // applyOrNull propagates NULL when the base is NullType (e.g. a NullType column from
+            // schema evolution) instead of throwing INVALID_EXTRACT_BASE_FIELD_TYPE.
+            ExtractValue.applyOrNull(e, Literal(name), resolver)
           }
           Seq(Alias(fieldExprs, nestedFields.last)())
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index b8d6054fc6fc5..e721b6ccede2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -45,8 +45,7 @@ trait BaseGenericInternalRow extends InternalRow {
   override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = getAs(ordinal)
   override def getUTF8String(ordinal: Int): UTF8String = getAs(ordinal)
   override def getBinary(ordinal: Int): Array[Byte] = getAs(ordinal)
-  override def getGeography(ordinal: Int): GeographyVal = getAs(ordinal)
-  override def getGeometry(ordinal: Int): GeometryVal = getAs(ordinal)
+  override def getBinaryView(ordinal: Int): BinaryView = getAs(ordinal)
   override def getArray(ordinal: Int): ArrayData = getAs(ordinal)
   override def getInterval(ordinal: Int): CalendarInterval = getAs(ordinal)
   override def getVariant(ordinal: Int): VariantVal = getAs(ordinal)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
index 986c0ef3a2ef4..fe774a62fcc6d 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.trees._
 import org.apache.spark.sql.catalyst.util.{Geography, Geometry, STUtils}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 
 /**
@@ -51,55 +52,72 @@ sealed trait GeospatialInputTypes extends ImplicitCastInputTypes {
 private[sql] object ExpressionDefaults {
   val DEFAULT_GEOGRAPHY_SRID: Int = Geography.DEFAULT_SRID
   val DEFAULT_GEOMETRY_SRID: Int = Geometry.DEFAULT_SRID
+  val DEFAULT_WKB_ENDIANNESS: String = "NDR"
 }
 
 /** ST writer expressions. */
 
 /**
- * Returns the input GEOGRAPHY or GEOMETRY value in WKB format.
+ * Returns the input GEOGRAPHY or GEOMETRY value in WKB format using the specified endianness, if
+ * provided. If no endianness is provided, it defaults to little endian.
  * See https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
  * for more details on the WKB format.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(geo) - Returns the geospatial value (value of type GEOGRAPHY or GEOMETRY) "
-    + "in WKB format.",
+  usage = "_FUNC_(geo[, endianness]) - Returns the geospatial value (value of type GEOGRAPHY or "
+    + "GEOMETRY) in WKB format using the specified endianness ('NDR' for little-endian, 'XDR' for "
+    + "big-endian), if provided. Defaults to little-endian encoding.",
   arguments = """
     Arguments:
       * geo - A geospatial value, either a GEOGRAPHY or a GEOMETRY.
+      * endianness - The optional endianness of the output WKB, 'NDR' for little-endian (default)
+                     or 'XDR' for big-endian.
   """,
   examples = """
     Examples:
       > SELECT hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')));
        0101000000000000000000F03F0000000000000040
-      > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040')));
-       0101000000000000000000F03F0000000000000040
+      > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040'), 'XDR'));
+       00000000013FF00000000000004000000000000000
   """,
   since = "4.1.0",
   group = "st_funcs"
 )
-case class ST_AsBinary(geo: Expression)
+case class ST_AsBinary(geo: Expression, endianness: Expression)
     extends RuntimeReplaceable
     with GeospatialInputTypes
-    with UnaryLike[Expression] {
+    with BinaryLike[Expression] {
+
+  // If no endianness is given, default to little-endian encoding which is represented by "NDR".
+  def this(geo: Expression) = {
+    this(geo, Literal(ExpressionDefaults.DEFAULT_WKB_ENDIANNESS))
+  }
 
   override def inputTypes: Seq[AbstractDataType] = Seq(
-    TypeCollection(GeographyType, GeometryType)
+    TypeCollection(GeographyType, GeometryType),
+    StringTypeWithCollation(supportsTrimCollation = true)
   )
 
   override lazy val replacement: Expression = StaticInvoke(
     classOf[STUtils],
     BinaryType,
-    "stAsBinary",
-    Seq(geo),
+    geo.dataType match {
+      case _: GeographyType => "stGeogAsBinary"
+      case _: GeometryType => "stGeomAsBinary"
+    },
+    Seq(geo, endianness),
     returnNullable = false
   )
 
   override def prettyName: String = "st_asbinary"
 
-  override def child: Expression = geo
+  override def left: Expression = geo
 
-  override protected def withNewChildInternal(newChild: Expression): ST_AsBinary =
-    copy(geo = newChild)
+  override def right: Expression = endianness
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression,
+      newRight: Expression): ST_AsBinary = copy(geo = newLeft, endianness = newRight)
 }
 
 /** ST reader expressions. */
@@ -245,7 +263,10 @@ case class ST_Srid(geo: Expression)
   override lazy val replacement: Expression = StaticInvoke(
     classOf[STUtils],
     IntegerType,
-    "stSrid",
+    geo.dataType match {
+      case _: GeographyType => "stGeogSrid"
+      case _: GeometryType => "stGeomSrid"
+    },
     Seq(geo),
     returnNullable = false
   )
@@ -295,7 +316,10 @@ case class ST_SetSrid(geo: Expression, srid: Expression)
   override lazy val replacement: Expression = StaticInvoke(
     classOf[STUtils],
     STExpressionUtils.geospatialTypeWithSrid(geo.dataType, srid),
-    "stSetSrid",
+    geo.dataType match {
+      case _: GeographyType => "stGeogSetSrid"
+      case _: GeometryType => "stGeomSetSrid"
+    },
     Seq(geo, srid),
     returnNullable = false
   )
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
index 61e51988c1e67..b32064ce78c16 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
@@ -34,7 +34,8 @@ object VariantExpressionEvalUtils {
   def parseJson(
       input: UTF8String,
       allowDuplicateKeys: Boolean = false,
-      failOnError: Boolean = true): VariantVal = {
+      failOnError: Boolean = true,
+      validateUnicodeInJsonParsing: Boolean = true): VariantVal = {
     def parseJsonFailure(exception: Throwable): VariantVal = {
       if (failOnError) {
         throw exception
@@ -43,7 +44,8 @@ object VariantExpressionEvalUtils {
       }
     }
     try {
-      val v = VariantBuilder.parseJson(input.toString, allowDuplicateKeys)
+      val v = VariantBuilder.parseJson(
+        input.toString, allowDuplicateKeys, validateUnicodeInJsonParsing)
       new VariantVal(v.getValue, v.getMetadata)
     } catch {
       case _: VariantSizeLimitException =>
@@ -70,6 +72,9 @@ object VariantExpressionEvalUtils {
     }
   }
 
+  def isValidVariant(input: VariantVal): Boolean =
+    VariantUtil.isValidVariant(input.getValue, input.getMetadata)
+
   /** Cast a Spark value from `dataType` into the variant type. */
   def castToVariant(input: Any, dataType: DataType): VariantVal = {
     // Enforce strict check because it is illegal for input struct/map/variant to contain duplicate
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
index a7e19dfb1948d..5d78f11bf86f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
@@ -62,8 +62,9 @@ case class ParseJson(child: Expression, failOnError: Boolean = true)
     Seq(
       child,
       Literal(SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS), BooleanType),
-      Literal(failOnError, BooleanType)),
-    inputTypes :+ BooleanType :+ BooleanType,
+      Literal(failOnError, BooleanType),
+      Literal(SQLConf.get.getConf(SQLConf.VARIANT_VALIDATE_UNICODE_IN_JSON_PARSING), BooleanType)),
+    inputTypes :+ BooleanType :+ BooleanType :+ BooleanType,
     returnNullable = !failOnError)
 
   override def inputTypes: Seq[AbstractDataType] =
@@ -952,3 +953,35 @@ case class SchemaOfVariantAgg(
   override protected def withNewChildInternal(newChild: Expression): Expression =
     copy(child = newChild)
 }
+
+@ExpressionDescription(
+  usage = "_FUNC_(v) - Returns true if the variant is valid, false if it is malformed, " +
+    "NULL if `v` is NULL.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(parse_json('null'));
+       true
+      > SELECT _FUNC_(parse_json('[{"b":true,"a":0}]'));
+       true
+  """,
+  since = "4.2.0",
+  group = "variant_funcs"
+)
+case class IsValidVariant(child: Expression) extends UnaryExpression
+  with Predicate with ExpectsInputTypes with RuntimeReplaceable {
+
+  override lazy val replacement: Expression = StaticInvoke(
+    VariantExpressionEvalUtils.getClass,
+    BooleanType,
+    "isValidVariant",
+    Seq(child),
+    inputTypes,
+    returnNullable = false)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(VariantType)
+
+  override def prettyName: String = "is_valid_variant"
+
+  override protected def withNewChildInternal(newChild: Expression): IsValidVariant =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 797355bed8312..fa03a9aea833a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -122,6 +122,8 @@ class JacksonParser(
   }
 
   private val variantAllowDuplicateKeys = SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS)
+  private val variantValidateUnicodeInJsonParsing =
+    SQLConf.get.getConf(SQLConf.VARIANT_VALIDATE_UNICODE_IN_JSON_PARSING)
 
   protected final def parseVariant(parser: JsonParser): VariantVal = {
     // Skips `FIELD_NAME` at the beginning. This check is adapted from `parseJsonToken`, but we
@@ -131,7 +133,8 @@ class JacksonParser(
       parser.nextToken()
     }
     try {
-      val v = VariantBuilder.parseJson(parser, variantAllowDuplicateKeys)
+      val v = VariantBuilder.parseJson(
+        parser, variantAllowDuplicateKeys, variantValidateUnicodeInJsonParsing)
       new VariantVal(v.getValue, v.getMetadata)
     } catch {
       case _: VariantSizeLimitException =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/normalizer/NormalizeCTEIds.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/normalizer/NormalizeCTEIds.scala
index 6c0bca0e1104f..660f11d368ab8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/normalizer/NormalizeCTEIds.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/normalizer/NormalizeCTEIds.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.mutable
 
+import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{CacheTableAsSelect, CTERelationRef, LogicalPlan, UnionLoop, UnionLoopRef, WithCTE}
 import org.apache.spark.sql.catalyst.rules.Rule
 
@@ -52,16 +53,29 @@ object NormalizeCTEIds extends Rule[LogicalPlan] {
 
   private def canonicalizeCTE(
       plan: LogicalPlan,
-      defIdToNewId: mutable.Map[Long, Long]): LogicalPlan = {
-    plan.transformDownWithSubqueries {
-      // For nested WithCTE, if defIndex didn't contain the cteId,
-      // means it's not current WithCTE's ref.
-      case ref: CTERelationRef if defIdToNewId.contains(ref.cteId) =>
-        ref.copy(cteId = defIdToNewId(ref.cteId))
-      case unionLoop: UnionLoop if defIdToNewId.contains(unionLoop.id) =>
-        unionLoop.copy(id = defIdToNewId(unionLoop.id))
-      case unionLoopRef: UnionLoopRef if defIdToNewId.contains(unionLoopRef.loopId) =>
-        unionLoopRef.copy(loopId = defIdToNewId(unionLoopRef.loopId))
-    }
+      defIdToNewId: mutable.Map[Long, Long]): LogicalPlan = plan match {
+    // Stop at nested WithCTEs because applyInternal canonicalizes each WithCTE scope
+    // independently. Descending here would re-apply the shared cteIdToNewId map to
+    // inner-scope refs and, under sibling WithCTEs, move them to the wrong CTE
+    // definition (SPARK-56921).
+    case _: WithCTE => plan
+    case other =>
+      val normalizedPlan = other match {
+        case ref: CTERelationRef if defIdToNewId.contains(ref.cteId) =>
+          ref.copy(cteId = defIdToNewId(ref.cteId))
+        case unionLoop: UnionLoop if defIdToNewId.contains(unionLoop.id) =>
+          unionLoop.copy(id = defIdToNewId(unionLoop.id))
+        case unionLoopRef: UnionLoopRef if defIdToNewId.contains(unionLoopRef.loopId) =>
+          unionLoopRef.copy(loopId = defIdToNewId(unionLoopRef.loopId))
+        case _ =>
+          other
+      }
+
+      normalizedPlan
+        .withNewChildren(normalizedPlan.children.map(canonicalizeCTE(_, defIdToNewId)))
+        .transformExpressionsDown {
+          case subqueryExpression: SubqueryExpression =>
+            subqueryExpression.withNewPlan(canonicalizeCTE(subqueryExpression.plan, defIdToNewId))
+        }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index 776efbed273e3..7d5fd9fe57913 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, TransformValues, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayDistinct, ArrayExcept, ArrayIntersect, ArraysOverlap, ArrayTransform, ArrayUnion, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, TransformValues, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Window}
@@ -31,59 +31,82 @@ import org.apache.spark.util.ArrayImplicits._
  * We need to take care of special floating numbers (NaN and -0.0) in several places:
  *   1. When compare values, different NaNs should be treated as same, `-0.0` and `0.0` should be
  *      treated as same.
- *   2. In aggregate grouping keys, different NaNs should belong to the same group, -0.0 and 0.0
+ *   2. In aggregate grouping keys, different NaNs should belong to the same group, `-0.0` and `0.0`
  *      should belong to the same group.
  *   3. In join keys, different NaNs should be treated as same, `-0.0` and `0.0` should be
  *      treated as same.
- *   4. In window partition keys, different NaNs should belong to the same partition, -0.0 and 0.0
- *      should belong to the same partition.
+ *   4. In window partition keys, different NaNs should belong to the same partition, `-0.0`
+ *      and `0.0` should belong to the same partition.
+ *   5. In hash-based array set operations, different NaNs should be treated as same, `-0.0`
+ *      and `0.0` should be treated as same.
  *
- * Case 1 is fine, as we handle NaN and -0.0 well during comparison. For complex types, we
+ * Case 1 is fine, as we handle NaN and `-0.0` well during comparison. For complex types, we
  * recursively compare the fields/elements, so it's also fine.
  *
  * Case 2, 3 and 4 are problematic, as Spark SQL turns grouping/join/window partition keys into
  * binary `UnsafeRow` and compare the binary data directly. Different NaNs have different binary
- * representation, and the same thing happens for -0.0 and 0.0.
+ * representation, and the same thing happens for `-0.0` and `0.0`.
  *
- * This rule normalizes NaN and -0.0 in window partition keys, join keys and aggregate grouping
- * keys.
+ * Case 5 is problematic for a similar reason: hash-based array set operations compare elements by
+ * their binary representation via hash sets.
+ *
+ * This rule runs in two places:
+ *    1. Early in `FinishAnalysis` (right after `ReplaceExpressions` and before `EvalInlineTables`)
+ *    so that array set-like operations are wrapped before optimizer rules that pre-evaluate
+ *    expressions (e.g. `ConstantFolding`, `ConvertToLocalRelation`, `EvalInlineTables`).
+ *
+ *    2. As a late batch at the end of the optimizer, because rules like subquery rewrite and
+ *    join reorder can create new joins or join conditions after `FinishAnalysis` that still
+ *    need their keys to be normalized.
  *
  * Ideally we should do the normalization in the physical operators that compare the
  * binary `UnsafeRow` directly. We don't need this normalization if the Spark SQL execution engine
  * is not optimized to run on binary data. This rule is created to simplify the implementation, so
  * that we have a single place to do normalization, which is more maintainable.
  *
- * Note that, this rule must be executed at the end of optimizer, because the optimizer may create
- * new joins(the subquery rewrite) and new join conditions(the join reorder).
  */
 object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
 
-  def apply(plan: LogicalPlan): LogicalPlan = plan match {
-    case _ => plan.transformWithPruning( _.containsAnyPattern(WINDOW, JOIN)) {
-      case w: Window if w.partitionSpec.exists(p => needNormalize(p)) =>
-        // Although the `windowExpressions` may refer to `partitionSpec` expressions, we don't need
-        // to normalize the `windowExpressions`, as they are executed per input row and should take
-        // the input row as it is.
-        w.copy(partitionSpec = w.partitionSpec.map(normalize))
-
-      // Only hash join and sort merge join need the normalization. Here we catch all Joins with
-      // join keys, assuming Joins with join keys are always planned as hash join or sort merge
-      // join. It's very unlikely that we will break this assumption in the near future.
-      case j @ ExtractEquiJoinKeys(_, leftKeys, rightKeys, condition, _, _, _, _)
-          // The analyzer guarantees left and right joins keys are of the same data type. Here we
-          // only need to check join keys of one side.
-          if leftKeys.exists(k => needNormalize(k)) =>
-        val newLeftJoinKeys = leftKeys.map(normalize)
-        val newRightJoinKeys = rightKeys.map(normalize)
-        val newConditions = newLeftJoinKeys.zip(newRightJoinKeys).map {
-          case (l, r) => EqualTo(l, r)
-        } ++ condition
-        j.copy(condition = Some(newConditions.reduce(And)))
-
-      // TODO: ideally Aggregate should also be handled here, but its grouping expressions are
-      // mixed in its aggregate expressions. It's unreliable to change the grouping expressions
-      // here. For now we normalize grouping expressions in `AggUtils` during planning.
-    }
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    plan
+      .transformWithPruning( _.containsAnyPattern(WINDOW, JOIN)) {
+        case w: Window if w.partitionSpec.exists(p => needNormalize(p)) =>
+          // Although the `windowExpressions` may refer to `partitionSpec` expressions,
+          // we don't need to normalize the `windowExpressions`, as they are executed
+          // per input row and should take the input row as it is.
+          w.copy(partitionSpec = w.partitionSpec.map(normalize))
+
+        // Only hash join and sort merge join need the normalization. Here we catch all Joins with
+        // join keys, assuming Joins with join keys are always planned as hash join or sort merge
+        // join. It's very unlikely that we will break this assumption in the near future.
+        case j @ ExtractEquiJoinKeys(_, leftKeys, rightKeys, condition, _, _, _, _)
+            // The analyzer guarantees left and right joins keys are of the same data type. Here we
+            // only need to check join keys of one side.
+            if leftKeys.exists(k => needNormalize(k)) =>
+          val newLeftJoinKeys = leftKeys.map(normalize)
+          val newRightJoinKeys = rightKeys.map(normalize)
+          val newConditions = newLeftJoinKeys.zip(newRightJoinKeys).map {
+            case (l, r) => EqualTo(l, r)
+          } ++ condition
+          j.copy(condition = Some(newConditions.reduce(And)))
+
+        // TODO: ideally Aggregate should also be handled here, but its grouping expressions are
+        // mixed in its aggregate expressions. It's unreliable to change the grouping expressions
+        // here. For now we normalize grouping expressions in `AggUtils` during planning.
+      }
+      .transformAllExpressionsWithPruning(_.containsAnyPattern(
+        ARRAY_DISTINCT, ARRAY_UNION, ARRAY_INTERSECT, ARRAY_EXCEPT, ARRAYS_OVERLAP)) {
+        case e: ArrayDistinct if needNormalize(e.child) =>
+          e.copy(child = normalize(e.child))
+        case e: ArrayUnion if needNormalize(e.left) =>
+          e.copy(left = normalize(e.left), right = normalize(e.right))
+        case e: ArrayIntersect if needNormalize(e.left) =>
+          e.copy(left = normalize(e.left), right = normalize(e.right))
+        case e: ArrayExcept if needNormalize(e.left) =>
+          e.copy(left = normalize(e.left), right = normalize(e.right))
+        case e: ArraysOverlap if needNormalize(e.left) =>
+          e.copy(left = normalize(e.left), right = normalize(e.right))
+      }
   }
 
   /**
@@ -94,7 +117,7 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
     case _ => needNormalize(expr.dataType)
   }
 
-  private def needNormalize(dt: DataType): Boolean = dt match {
+  private[sql] def needNormalize(dt: DataType): Boolean = dt match {
     case FloatType | DoubleType => true
     case StructType(fields) => fields.exists(f => needNormalize(f.dataType))
     case ArrayType(et, _) => needNormalize(et)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 25e7479d8897a..3bef1658c803b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -214,13 +214,17 @@ abstract class Optimizer(catalogManager: CatalogManager)
       OptimizeSubqueries,
       OptimizeOneRowRelationSubquery),
     Batch("Replace Operators", fixedPoint,
+      // SPARK-51262: ReplaceDeduplicateWithAggregate must run before RewriteExceptAll because
+      // it replaces Deduplicate with Aggregate(First(...)), creating new attribute exprIds.
+      // If RewriteExceptAll runs first, its Generate node captures stale exprIds that no
+      // longer exist after the Deduplicate-to-Aggregate rewrite.
+      ReplaceDeduplicateWithAggregate,
       RewriteExceptAll,
       RewriteIntersectAll,
       ReplaceIntersectWithSemiJoin,
       ReplaceExceptWithFilter,
       ReplaceExceptWithAntiJoin,
-      ReplaceDistinctWithAggregate,
-      ReplaceDeduplicateWithAggregate),
+      ReplaceDistinctWithAggregate),
     Batch("Aggregate", fixedPoint,
       RemoveLiteralFromGroupExpressions,
       RemoveRepetitionFromGroupExpressions),
@@ -328,6 +332,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       EliminateView,
       EliminateSQLFunctionNode,
       ReplaceExpressions,
+      NormalizeFloatingNumbers,
       RewriteNonCorrelatedExists,
       PullOutGroupingExpressions,
       // Put `InsertMapSortInGroupingExpressions` after `PullOutGroupingExpressions`,
@@ -339,6 +344,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       ReplaceCurrentLike(catalogManager),
       SpecialDatetimeValues,
       RewriteAsOfJoin,
+      RewriteNearestByJoin,
       EvalInlineTables,
       ReplaceTranspose,
       RewriteCollationJoin
@@ -1295,7 +1301,7 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
         limit.copy(child = p2.copy(projectList = newProjectList))
       case Project(l1, r @ Repartition(_, _, p @ Project(l2, _))) if isRenaming(l1, l2) =>
         r.copy(child = p.copy(projectList = buildCleanedProjectList(l1, p.projectList)))
-      case Project(l1, s @ Sample(_, _, _, _, p2 @ Project(l2, _))) if isRenaming(l1, l2) =>
+      case Project(l1, s @ Sample(_, _, _, _, p2 @ Project(l2, _), _)) if isRenaming(l1, l2) =>
         s.copy(child = p2.copy(projectList = buildCleanedProjectList(l1, p2.projectList)))
       case o => o
     }
@@ -2544,10 +2550,10 @@ object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan =
     if (conf.crossJoinEnabled) {
       plan
-    } else plan.transformWithPruning(_.containsAnyPattern(INNER_LIKE_JOIN, OUTER_JOIN))  {
+    } else plan.transformWithPruning(_.containsAnyPattern(INNER_LIKE_JOIN, OUTER_JOIN)) {
       case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, _, _)
-        if isCartesianProduct(j) =>
-          throw QueryCompilationErrors.joinConditionMissingOrTrivialError(j, left, right)
+          if isCartesianProduct(j) =>
+        throw QueryCompilationErrors.joinConditionMissingOrTrivialError(j, left, right)
     }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PlanMerger.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PlanMerger.scala
index a85bed783de6e..1c43f91cee9dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PlanMerger.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PlanMerger.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeMap, Expression, If, Literal, NamedExpression, Or}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.{Cross, Inner, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Join, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.internal.SQLConf
@@ -85,12 +86,24 @@ object PlanMerger {
  * When `filterPropagationEnabled` is true, non-grouping [[Aggregate]]s over the same base plan
  * with different [[Filter]] conditions can also be merged. The filter conditions are exposed as
  * boolean [[Project]] attributes and consumed at the [[Aggregate]] as FILTER clauses.
- * When both sides carry a [[Filter]] (the symmetric case), merging broadens the scan to
- * OR(f1, f2), which may reduce IO pruning. This path is separately gated by
+ * When both sides carry a [[Filter]] (the symmetric case), merging broadens the scan to OR(f1, f2),
+ * which may reduce IO pruning. This path is separately gated by
  * `symmetricFilterPropagationEnabled`.
  * When plans also differ in intermediate [[Project]] expressions, those are wrapped with
- * `If(filterAttr, expr, null)` to avoid computing the expression for rows that do not
- * match that side's filter condition.
+ * `If(filterAttr, expr, null)` to avoid computing the expression for rows that do not match that
+ * side's filter condition.
+ * Filter propagation also works through [[Join]] nodes: a filter on one child of the join produces
+ * a boolean attribute that flows through the join output to the enclosing [[Aggregate]].
+ * Propagation is only safe when the filter originates from the non-nullable side of the join, as
+ * enforced by `filterSafeForJoin`. When the filter is on the nullable side, the merged base plan
+ * restores rows that were filtered out of the nullable child, turning what were unmatched
+ * NULL-padded rows in the original plan into matched rows with real column values. This changes the
+ * result of expressions like `coalesce(col, default)` in the aggregate: an originally unmatched row
+ * would have contributed `default` via `coalesce(NULL, default)`, but in the merged plan it is
+ * matched, its real column value fails the filter, and `FILTER (WHERE false)` discards it entirely.
+ * Propagation is also skipped when both the left and right children simultaneously produce filter
+ * attributes, as combining them would require an additional AND alias above the join (not yet
+ * supported).
  *
  * {{{
  *   // Input plans
@@ -120,7 +133,9 @@ class PlanMerger(
     filterPropagationEnabled: Boolean =
       SQLConf.get.getConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_ENABLED),
     symmetricFilterPropagationEnabled: Boolean =
-      SQLConf.get.getConf(SQLConf.MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED)) {
+      SQLConf.get.getConf(SQLConf.MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED),
+    filterPropagationThroughJoinEnabled: Boolean =
+      SQLConf.get.getConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED)) {
   val cache = mutable.ArrayBuffer.empty[MergedPlan]
 
   /**
@@ -146,27 +161,27 @@ class PlanMerger(
           // `ReusedSubqueryExec` rule can handle them without extracting the plans to CTEs.
           // But, when a non-subquery subplan is identical to a cached plan we need to mark the plan
           // `merged` and so extract it to a CTE later.
-          val newMergedPlan = MergedPlan(mp.plan, cache(i).merged || !subqueryPlan)
+          val newMergedPlan = MergedPlan(mp.plan, mp.merged || !subqueryPlan)
           cache(i) = newMergedPlan
           val outputMap = AttributeMap(plan.output.zipWithIndex)
           MergeResult(newMergedPlan, i, outputMap)
         }.orElse {
           tryMergePlans(plan, mp.plan, false).collect {
             case TryMergeResult(mergedPlan, npMapping, None, None) =>
-              val newMergePlan = MergedPlan(mergedPlan, true)
-              cache(i) = newMergePlan
+              val newMergedPlan = MergedPlan(mergedPlan, true)
+              cache(i) = newMergedPlan
               val outputMap = AttributeMap(npMapping.iterator.map { case (origAttr, mergedAttr) =>
                 origAttr -> mergedPlan.output.indexWhere(_.exprId == mergedAttr.exprId)
               }.toSeq)
-              MergeResult(newMergePlan, i, outputMap)
+              MergeResult(newMergedPlan, i, outputMap)
           }
         }
       case _ => None
     }).getOrElse {
-      val newMergePlan = MergedPlan(plan, false)
-      cache += newMergePlan
+      val newMergedPlan = MergedPlan(plan, false)
+      cache += newMergedPlan
       val outputMap = AttributeMap(plan.output.zipWithIndex)
-      MergeResult(newMergePlan, cache.length - 1, outputMap)
+      MergeResult(newMergedPlan, cache.length - 1, outputMap)
     }
   }
 
@@ -224,7 +239,8 @@ class PlanMerger(
    * - Aggregate nodes: Combines aggregate expressions if grouping is identical and both
    *   support the same aggregate implementation (hash/object-hash/sort-based)
    * - Filter nodes: Only if filter conditions are identical
-   * - Join nodes: Only if join type, hints, and conditions are identical
+   * - Join nodes: Requires identical join type, hints, and conditions; filter propagation is
+   *   forwarded into the join's children so a filter difference on one child can still be merged
    *
    * @param newPlan The plan to merge into the cached plan.
    * @param cachedPlan The cached plan to merge with.
@@ -339,7 +355,8 @@ class PlanMerger(
                   }
                   existingNPFilter match {
                     case Some(reusedFilter) =>
-                      Some(TryMergeResult(cp, npMapping, Some((reusedFilter, false)), None))
+                      val newFilter = cp.withNewChildren(Seq(mergedChild))
+                      Some(TryMergeResult(newFilter, npMapping, Some((reusedFilter, false)), None))
                     case None =>
                       val newNPFilterAlias =
                         Alias(newNPCondition, s"propagatedFilter_${PlanMerger.newId}")()
@@ -394,7 +411,7 @@ class PlanMerger(
                 Alias(newNPCondition, s"propagatedFilter_${PlanMerger.newId}")()
               val newNPFilter = newNPFilterAlias.toAttribute
               val project = Project(
-                mergedChild.output.toList ++ Seq(newNPFilterAlias) ++ cpFilter.toSeq,
+                mergedChild.output.toList :+ newNPFilterAlias,
                 mergedChild)
               TryMergeResult(project, npMapping, Some((newNPFilter, true)), cpFilter)
           }
@@ -405,29 +422,59 @@ class PlanMerger(
             // symmetricFilterPropagationEnabled.
             case TryMergeResult(mergedChild, npMapping, npFilter, cpFilter)
                 if npFilter.isEmpty || symmetricFilterPropagationEnabled =>
-              val newCPCondition = cpFilter.fold(cp.condition)(And(_, cp.condition))
-              val newCPFilterAlias =
-                Alias(newCPCondition, s"propagatedFilter_${PlanMerger.newId}")()
-              val newCPFilter = newCPFilterAlias.toAttribute
-              val project = Project(
-                mergedChild.output.toList ++ npFilter.map(_._1).toSeq ++ Seq(newCPFilterAlias),
-                mergedChild)
-              TryMergeResult(project, npMapping, npFilter, Some(newCPFilter))
+              if (cp.getTagValue(PlanMerger.MERGED_FILTER_TAG).isDefined) {
+                // cp is a previously-merged Filter: its condition is `OR(pf_0, pf_1, ...)` and cp's
+                // aggregate expressions already carry individual `FILTER (WHERE pf_i)` clauses that
+                // restrict each aggregation to its originating side. Synthesising a new cpFilter
+                // alias for cp.condition would just produce `FILTER AND(OR(pf_0, pf_1, ...), pf_i)`
+                // upstream, which simplifies to `FILTER pf_i` -- wasted work and plan bloat.
+                // Drop cp's Filter and let the recursion's result flow up with cpFilter = None so
+                // cp's aggregates are left untouched.
+                TryMergeResult(mergedChild, npMapping, npFilter, None)
+              } else {
+                val newCPCondition = cpFilter.fold(cp.condition)(And(_, cp.condition))
+                val newCPFilterAlias =
+                  Alias(newCPCondition, s"propagatedFilter_${PlanMerger.newId}")()
+                val newCPFilter = newCPFilterAlias.toAttribute
+                val project = Project(
+                  mergedChild.output.toList :+ newCPFilterAlias,
+                  mergedChild)
+                TryMergeResult(project, npMapping, npFilter, Some(newCPFilter))
+              }
           }
 
         case (np: Join, cp: Join) if np.joinType == cp.joinType && np.hint == cp.hint =>
-          // Filter propagation across joins is not yet supported.
-          tryMergePlans(np.left, cp.left, false).flatMap {
-            case TryMergeResult(mergedLeft, leftNPMapping, None, None) =>
-              tryMergePlans(np.right, cp.right, false).flatMap {
-                case TryMergeResult(mergedRight, rightNPMapping, None, None) =>
+          tryMergePlans(np.left, cp.left, filterPropagationSupported).flatMap {
+            case TryMergeResult(mergedLeft, leftNPMapping, leftNPFilter, leftCPFilter) =>
+              tryMergePlans(np.right, cp.right, filterPropagationSupported).flatMap {
+                case TryMergeResult(mergedRight, rightNPMapping, rightNPFilter, rightCPFilter)
+                    // If both children independently propagate filter attributes we would need to
+                    // AND them into a new alias above the join, which is not yet supported.
+                    if !(leftNPFilter.isDefined && rightNPFilter.isDefined) &&
+                       !(leftCPFilter.isDefined && rightCPFilter.isDefined) &&
+                       // Gate join-crossing filter propagation behind its own config flag.
+                       // When no filter attributes are in play the merge is unconditionally safe.
+                       (leftNPFilter.isEmpty && leftCPFilter.isEmpty &&
+                           rightNPFilter.isEmpty && rightCPFilter.isEmpty ||
+                           filterPropagationThroughJoinEnabled) &&
+                       // A filter attribute is only safe to propagate through a join if it comes
+                       // from the "preserved" (non-nullable) side. On the nullable side, unmatched
+                       // rows are NULL-padded so f=NULL, causing FILTER (WHERE f) to incorrectly
+                       // exclude rows that should contribute to the aggregate. Right-side
+                       // attributes are also absent from semi/anti join output.
+                       (leftNPFilter.isEmpty && leftCPFilter.isEmpty  ||
+                           filterSafeForJoin(fromLeft = true, cp.joinType)) &&
+                       (rightNPFilter.isEmpty && rightCPFilter.isEmpty ||
+                           filterSafeForJoin(fromLeft = false, cp.joinType)) =>
                   val npMapping = leftNPMapping ++ rightNPMapping
                   val mappedNPCondition = np.condition.map(mapAttributes(_, npMapping))
                   // Comparing the canonicalized form is required to ignore different forms of the
                   // same expression and `AttributeReference.qualifier`s in `cp.condition`.
                   if (mappedNPCondition.map(_.canonicalized) == cp.condition.map(_.canonicalized)) {
-                    val mergedPlan = cp.withNewChildren(Seq(mergedLeft, mergedRight))
-                    Some(TryMergeResult(mergedPlan, npMapping))
+                    val npFilter = leftNPFilter.orElse(rightNPFilter)
+                    val cpFilter = leftCPFilter.orElse(rightCPFilter)
+                    Some(TryMergeResult(cp.withNewChildren(Seq(mergedLeft, mergedRight)), npMapping,
+                      npFilter, cpFilter))
                   } else {
                     None
                   }
@@ -441,6 +488,35 @@ class PlanMerger(
       })
   }
 
+  // Returns true when a filter attribute originating from `fromLeft` child of a join with
+  // `joinType` can be safely propagated through that join to a parent Aggregate.
+  //
+  // Two conditions must both hold:
+  //   1. The attribute is in the join's output (rules out the right side of LeftSemi/LeftAnti).
+  //   2. The filter must originate from the non-nullable ("preserved") side of the join.
+  //      When a filter is on the nullable side, the merged base plan no longer applies it to the
+  //      nullable child's scan, so rows that were previously absent from that child reappear as
+  //      matched join rows instead of unmatched NULL-padded rows. This changes aggregate
+  //      expressions that use the NULL-padded column: e.g. for `sum(coalesce(col, default))`, an
+  //      originally unmatched row would have contributed `default` via `coalesce(NULL, default)`,
+  //      but in the merged plan the row is now matched with its real column value, fails the
+  //      filter, and FILTER (WHERE false) discards it -- losing the `default` contribution
+  //      entirely.
+  private def filterSafeForJoin(fromLeft: Boolean, joinType: JoinType): Boolean =
+    if (fromLeft) {
+      // Left side is never NULL-padded in: Inner, LeftOuter, LeftSemi, LeftAnti, Cross.
+      joinType match {
+        case Inner | LeftOuter | LeftSemi | LeftAnti | Cross => true
+        case _ => false  // RightOuter and FullOuter can NULL-pad the left side
+      }
+    } else {
+      // Right side is never NULL-padded AND is in the join output in: Inner, RightOuter, Cross.
+      joinType match {
+        case Inner | RightOuter | Cross => true
+        case _ => false  // LeftOuter/FullOuter can NULL-pad right; LeftSemi/LeftAnti drop right
+      }
+    }
+
   private def mapAttributes[T <: Expression](expr: T, outputMap: AttributeMap[Attribute]) = {
     expr.transform {
       case a: Attribute => outputMap.getOrElse(a, a)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 9a676571d1071..3d14bffb2fb92 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -60,7 +60,10 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
       val newCond = replaceNullWithFalse(cond)
       val newGroupFilterCond = groupFilterCond.map(replaceNullWithFalse)
       rd.copy(condition = newCond, groupFilterCondition = newGroupFilterCond)
-    case wd @ WriteDelta(_, cond, _, _, _, _) => wd.copy(condition = replaceNullWithFalse(cond))
+    case wd @ WriteDelta(_, cond, _, _, _, groupFilterCond, _) =>
+      val newCond = replaceNullWithFalse(cond)
+      val newGroupFilterCond = groupFilterCond.map(replaceNullWithFalse)
+      wd.copy(condition = newCond, groupFilterCondition = newGroupFilterCond)
     case d @ DeleteFromTable(_, cond) => d.copy(condition = replaceNullWithFalse(cond))
     case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond)))
     case m: MergeIntoTable =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteNearestByJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteNearestByJoin.scala
new file mode 100644
index 0000000000000..ee8e61b457fa8
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteNearestByJoin.scala
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+
+/**
+ * Replaces a logical [[NearestByJoin]] operator with a `Generate(Inline(...))` over an
+ * `Aggregate` that tags each left row with a unique id, cross-joins with the right side, and
+ * groups by the unique id to compute the top-K matches via `MAX_BY`/`MIN_BY` (K-overload).
+ *
+ * Input Pseudo-Query:
+ * {{{
+ *    SELECT * FROM left [INNER | LEFT OUTER] JOIN right
+ *      {APPROX | EXACT} NEAREST k BY {DISTANCE | SIMILARITY} expr
+ * }}}
+ *
+ * Rewritten Plan (SIMILARITY, INNER join type):
+ * {{{
+ *    Project [left.*, right.*]
+ *      +- Generate inline(_matches), [N], outer=false, [right.col1, right.col2, ...]
+ *         +- Aggregate [__qid],
+ *              [first(left.col0) AS left.col0, ..., first(left.colN-1) AS left.colN-1,
+ *               max_by(struct(right.*), expr, k) AS _matches]
+ *             +- Join Inner   // or LeftOuter for `LEFT OUTER NEAREST BY`
+ *                :- Project [left.*, uuid() AS __qid]
+ *                :  +- left
+ *                +- right
+ * }}}
+ *
+ * For `DISTANCE`, `MIN_BY` is used instead of `MAX_BY`. For `LEFT OUTER`, the `Generate` is
+ * constructed with `outer = true` so left rows with no matches (empty/null `_matches`) are
+ * preserved with `NULL` right-side columns.
+ *
+ * The matches in `_matches` are produced by `MaxMinByK` ordered by the ranking value: best
+ * match first (largest ranking value for `SIMILARITY`, smallest for `DISTANCE`). `Inline`
+ * preserves array order, so the K rows emitted per left row appear best-first in the output
+ * of this rule. (Downstream operators may reorder.)
+ *
+ * If `rankingExpression` is nondeterministic, an extra `Project` is inserted above the `Join`
+ * to materialize the value as `__ranking__`. The standard projection machinery runs
+ * `Nondeterministic.initialize(partitionIndex)` on every nondeterministic descendant before any
+ * value is evaluated, so `MaxMinByK` only ever sees a plain `AttributeReference` and never
+ * evaluates a nondeterministic expression directly.
+ *
+ * Unlike [[RewriteAsOfJoin]], which uses a correlated scalar subquery, this rule materializes
+ * the cross product directly. A scalar subquery returns a single value per left row, so it
+ * cannot carry K matches without an array-valued subquery + `Generate(Inline(...))` -- which
+ * collapses back to the same cross product after decorrelation. The aggregate-then-inline form
+ * makes the intended shape explicit and avoids round-tripping through subquery decorrelation.
+ */
+object RewriteNearestByJoin extends Rule[LogicalPlan] {
+  private lazy val random = new scala.util.Random()
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
+    case j @ NearestByJoin(left, right, joinType, _, numResults, rankingExpression, direction) =>
+      // 1. Tag each left row with a unique id so that rows from the same left row can later be
+      //    grouped together after the cross-join with `right`.
+      val qidAlias = Alias(Uuid(Some(random.nextLong())), "__qid")()
+      val taggedLeft = Project(left.output :+ qidAlias, left)
+      val qidAttr = qidAlias.toAttribute
+
+      // 2. Join the tagged left with right (no join condition), using the user's join type.
+      //    For `LEFT OUTER`, left rows with no right-side match are preserved with `NULL`
+      //    right-side columns through the aggregate + inline below; for `INNER`, such rows
+      //    are dropped. When `right` is non-empty every left row already has right-row
+      //    pairings, so `LEFT OUTER` and `INNER` are equivalent in that case.
+      //
+      //    This synthetic join is an unconditioned cross-product, so `NEAREST BY` queries
+      //    are subject to `CheckCartesianProducts` and will be rejected when the user has
+      //    set `spark.sql.crossJoin.enabled = false`. That is intentional: if the user has
+      //    opted out of cross-products, the NEAREST BY rewrite -- which is itself a bounded
+      //    cross-product today -- should not silently bypass that choice.
+      val join = Join(taggedLeft, right, joinType, None, JoinHint.NONE)
+
+      val (aggInput, rankingForAgg) = if (!rankingExpression.deterministic) {
+        val rankingAlias = Alias(rankingExpression, "__ranking__")()
+        Project(join.output :+ rankingAlias, join) -> rankingAlias.toAttribute
+      } else {
+        join -> rankingExpression
+      }
+
+      // 4. Aggregate grouped by `__qid`:
+      //      - first(col) for every left column so it flows to the output.
+      //      - max_by/min_by(struct(right.*), ranking, k) as `_matches`.
+      //    The ranking expression references left and right columns directly; no outer
+      //    reference is needed because both sides are present in the joined input.
+      val rightStruct = CreateStruct(right.output)
+      // reverse = true  -> MIN_BY (smallest ranking value first, for DISTANCE)
+      // reverse = false -> MAX_BY (largest ranking value first, for SIMILARITY)
+      val reverse = direction match {
+        case NearestByDistance => true
+        case NearestBySimilarity => false
+      }
+      val topK = MaxMinByK(
+        rightStruct,
+        rankingForAgg,
+        Literal(numResults),
+        reverse = reverse).toAggregateExpression()
+      val matchesAlias = Alias(topK, "__nearest_matches__")()
+
+      // Carry left columns through with `First`. Within a `__qid` group every row has the same
+      // left values (each group corresponds to one left row), so `First` is effectively a no-op.
+      // We use `First` rather than adding all left columns to the GROUP BY because grouping by
+      // `__qid` alone keeps the shuffle key small.
+      val firstLeftAggs = left.output.map { attr =>
+        Alias(
+          First(attr, ignoreNulls = false).toAggregateExpression(),
+          attr.name)(exprId = attr.exprId, qualifier = attr.qualifier)
+      }
+      val aggregate = Aggregate(Seq(qidAttr), firstLeftAggs :+ matchesAlias, aggInput)
+
+      // 4. Generate inline(_matches) expands the K-element array into K rows, exposing each
+      //    struct field as a top-level column. `outer = true` for LEFT OUTER preserves the
+      //    left row with NULL right columns when there are no matches. Preserving the right
+      //    side's `ExprId`s in `generatorOutput` (rather than allocating fresh ones) keeps
+      //    `generate.output` byte-for-byte equivalent to `j.output` -- which already used
+      //    those ExprIds with `nullable = true` -- so parent-operator references continue to
+      //    resolve naturally and the rule can use plain `transformUp` without an attrMapping.
+      val generatorOutput = right.output.map { a =>
+        AttributeReference(a.name, a.dataType, nullable = true, a.metadata)(
+          exprId = a.exprId, qualifier = a.qualifier)
+      }
+      val generate = Generate(
+        Inline(matchesAlias.toAttribute),
+        unrequiredChildIndex = Seq(aggregate.output.indexOf(matchesAlias.toAttribute)),
+        outer = joinType == LeftOuter,
+        qualifier = None,
+        generatorOutput = generatorOutput,
+        child = aggregate)
+
+      // 5. Final `Project` pinning the output schema to `NearestByJoin.output`.
+      Project(j.output, generate)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index be07b440a118b..13e3cb76805d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -427,7 +427,7 @@ trait JoinSelectionHelper extends Logging {
       getBroadcastBuildSide(join, hintOnly = true, conf).isDefined ||
         (noShufflePlannedBefore &&
           getBroadcastBuildSide(join, hintOnly = false, conf).isDefined)
-    case ExtractSingleColumnNullAwareAntiJoin(_, _) => true
+    case j @ ExtractSingleColumnNullAwareAntiJoin(_, _) => canBroadcastBySize(j.right, conf)
     case _ => false
   }
 
@@ -560,4 +560,3 @@ trait JoinSelectionHelper extends Logging {
       conf.getConfString("spark.sql.join.forceApplyShuffledHashJoin", "false") == "true"
   }
 }
-
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 378081221c8c1..bf3e63571dbbc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -71,10 +71,15 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
       outerPlan: LogicalPlan,
       sub: LogicalPlan,
       joinCond: Option[Expression]): LogicalPlan = {
-    if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn) {
+    if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn && joinCond.isDefined) {
       DecorrelateInnerQuery.rewriteDomainJoins(outerPlan,
         sub, splitConjunctivePredicates(joinCond.get))
     } else {
+      // After PullupCorrelatedPredicates, BooleanSimplification can eliminate every
+      // correlated predicate in the subquery (e.g., FALSE AND outer.x = inner.x -> FALSE),
+      // leaving joinCond as None. In this case, no DomainJoin exists in sub because both
+      // joinCond and DomainJoins are produced by the same DecorrelateInnerQuery call --
+      // if there's no join condition, there are no domain joins to rewrite.
       sub
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
index 216136d8a7c82..29bf924f244e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
 import org.apache.spark.sql.catalyst.plans.logical.{CompoundPlanStatement, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.connector.catalog.PathElement
 import org.apache.spark.sql.errors.QueryParsingErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
@@ -110,6 +111,18 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface {
     }
   }
 
+  /**
+   * Parse the right-hand side of `SET PATH = ...` (a comma-separated list of path elements).
+   * Used by [[org.apache.spark.sql.connector.catalog.CatalogManager]] to honor the
+   * [[SQLConf.DEFAULT_PATH]] conf without re-implementing the SET PATH grammar.
+   */
+  private[sql] def parsePathElements(sqlText: String): Seq[PathElement] = parse(sqlText) { parser =>
+    val ctx = parser.singlePathElementList()
+    withErrorHandling(ctx, Some(sqlText)) {
+      astBuilder.visitSinglePathElementList(ctx)
+    }
+  }
+
   def withErrorHandling[T](ctx: ParserRuleContext, sqlText: Option[String])(toResult: => T): T = {
     withOrigin(ctx, sqlText) {
       try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 527f2b80314df..e6de6a1c9bc6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, DateTimeUtils, EvaluateUnresolvedInlineTable, IntervalUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampWithoutTimeZone}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogInfo, SupportsNamespaces, TableCatalog, TableWritePrivilege}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogContext, PathElement, SupportsNamespaces, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
@@ -708,6 +708,26 @@ class AstBuilder extends DataTypeAstBuilder
     visitMultipartIdentifier(ctx.multipartIdentifier)
   }
 
+  override def visitSinglePathElementList(
+      ctx: SinglePathElementListContext): Seq[PathElement] = withOrigin(ctx) {
+    ctx.pathElement().asScala.map(visitPathElement).toSeq
+  }
+
+  override def visitPathElement(ctx: PathElementContext): PathElement = withOrigin(ctx) {
+    if (ctx.DEFAULT_PATH() != null) PathElement.DefaultPath
+    else if (ctx.SYSTEM_PATH() != null) PathElement.SystemPath
+    else if (ctx.PATH() != null) PathElement.PathRef
+    else if (ctx.CURRENT_DATABASE() != null || ctx.CURRENT_SCHEMA() != null) {
+      PathElement.CurrentSchema
+    } else {
+      val parts = visitMultipartIdentifier(ctx.multipartIdentifier())
+      if (parts.length < 2) {
+        throw QueryCompilationErrors.invalidSqlPathSchemaReferenceError(parts.mkString("."))
+      }
+      PathElement.SchemaInPath(parts)
+    }
+  }
+
   override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
     typedVisit[DataType](ctx.dataType)
   }
@@ -913,32 +933,31 @@ class AstBuilder extends DataTypeAstBuilder
       query: LogicalPlan,
       queryAliasCtx: TableAliasContext): LogicalPlan = withOrigin(ctx) {
     ctx match {
-      // We cannot push withIdentClause() into the write command because:
-      //   1. `PlanWithUnresolvedIdentifier` is not a NamedRelation
-      //   2. Write commands do not hold the table logical plan as a child, and we need to add
-      //      additional resolution code to resolve identifiers inside the write commands.
+      // For all `InsertIntoStatement` / `OverwriteByExpression`-producing branches, build the
+      // `table` slot directly via `buildWriteTableSlot` so that any
+      // `PlanWithUnresolvedIdentifier` lives *inside* the command's identifier slot. This
+      // preserves the `CTEInChildren` shape and lets `CTESubstitution` place `WithCTE` on the
+      // command's children correctly (SPARK-46625).
       case table: InsertIntoTableContext =>
         val insertParams = visitInsertIntoTable(table)
-        withIdentClause(insertParams.relationCtx, Seq(query), (ident, otherPlans) => {
-          createInsertIntoStatement(
-            insertParams = insertParams,
-            ident = ident,
-            query = otherPlans.head,
-            overwrite = false,
-            writePrivileges = Set(TableWritePrivilege.INSERT),
-            withSchemaEvolution = table.EVOLUTION() != null)
-        })
+        val privileges = Set(TableWritePrivilege.INSERT)
+        createInsertIntoStatement(
+          insertParams = insertParams,
+          tableSlot = buildWriteTableSlot(
+            insertParams.relationCtx, insertParams.options, privileges),
+          query = query,
+          overwrite = false,
+          withSchemaEvolution = table.EVOLUTION() != null)
       case table: InsertOverwriteTableContext =>
         val insertParams = visitInsertOverwriteTable(table)
-        withIdentClause(insertParams.relationCtx, Seq(query), (ident, otherPlans) => {
-          createInsertIntoStatement(
-            insertParams = insertParams,
-            ident = ident,
-            query = otherPlans.head,
-            overwrite = true,
-            writePrivileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE),
-            withSchemaEvolution = table.EVOLUTION() != null)
-        })
+        val privileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)
+        createInsertIntoStatement(
+          insertParams = insertParams,
+          tableSlot = buildWriteTableSlot(
+            insertParams.relationCtx, insertParams.options, privileges),
+          query = query,
+          overwrite = true,
+          withSchemaEvolution = table.EVOLUTION() != null)
       case ctx: InsertIntoReplaceBooleanCondContext =>
         // Although REPLACE WHERE and REPLACE ON share a unified grammar rule, they have
         // different SQL semantics:
@@ -948,63 +967,66 @@ class AstBuilder extends DataTypeAstBuilder
         // while REPLACE WHERE still can.
         val isInsertReplaceWhere = ctx.WHERE() != null
         if (isInsertReplaceWhere) {
+          // The unified grammar rule for REPLACE WHERE | ON accepts a table alias for
+          // symmetry with REPLACE ON (whose condition can reference the target via the
+          // alias, e.g. `t.col`). The REPLACE WHERE branch has no use for the alias
+          // because the WHERE condition is evaluated against the target table directly.
+          // Reject explicitly so users get a clear parse error instead of a confusing
+          // column-not-found at analysis time.
+          if (ctx.tableAlias() != null && ctx.tableAlias().strictIdentifier() != null) {
+            throw QueryParsingErrors.insertReplaceWhereTableAliasNotAllowed(ctx.tableAlias())
+          }
           val options = Option(ctx.optionsClause())
-          withIdentClause(ctx.identifierReference, Seq(query), (ident, otherPlans) => {
-            val table = createUnresolvedRelation(
-              ctx = ctx.identifierReference,
-              ident = ident,
-              optionsClause = options,
-              writePrivileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE),
-              isStreaming = false)
-            val deleteExpr = expression(ctx.replaceCondition)
-            val isByName = ctx.NAME() != null
-            if (isByName) {
-              OverwriteByExpression.byName(
-                table,
-                df = otherPlans.head,
-                deleteExpr,
-                withSchemaEvolution = ctx.EVOLUTION() != null)
-            } else {
-              OverwriteByExpression.byPosition(
-                table,
-                query = otherPlans.head,
-                deleteExpr,
-                withSchemaEvolution = ctx.EVOLUTION() != null)
-            }
-          })
-        } else {
-          val insertParams = visitInsertIntoReplaceOn(ctx)
-          withIdentClause(insertParams.relationCtx, Seq(query), (ident, otherPlans) => {
-            val query = {
-              val queryAliasOpt =
-                getTableAliasWithoutColumnAlias(queryAliasCtx, "INSERT REPLACE ON")
-
-              queryAliasOpt.map { queryAlias =>
-                withOrigin(queryAliasCtx) {
-                  SubqueryAlias(queryAlias, child = otherPlans.head)
-                }
-              }.getOrElse(otherPlans.head)
-            }
-            createInsertIntoStatement(
-              insertParams = insertParams,
-              ident = ident,
+          val privileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)
+          // `PlanWithUnresolvedIdentifier` is a `NamedRelation`, so it can occupy
+          // `OverwriteByExpression.table` directly; the materialization happens in
+          // `ResolveIdentifierClause` via its `OverwriteByExpression` special-case.
+          val table = buildWriteTableSlot(ctx.identifierReference, options, privileges)
+          val deleteExpr = expression(ctx.replaceCondition)
+          val isByName = ctx.NAME() != null
+          if (isByName) {
+            OverwriteByExpression.byName(
+              table,
+              df = query,
+              deleteExpr,
+              withSchemaEvolution = ctx.EVOLUTION() != null)
+          } else {
+            OverwriteByExpression.byPosition(
+              table,
               query = query,
-              overwrite = true,
-              writePrivileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE),
+              deleteExpr,
               withSchemaEvolution = ctx.EVOLUTION() != null)
-          })
-        }
-      case ctx: InsertIntoReplaceUsingContext =>
-        val insertParams = visitInsertIntoReplaceUsing(ctx)
-        withIdentClause(insertParams.relationCtx, Seq(query), (ident, otherPlans) => {
+          }
+        } else {
+          val insertParams = visitInsertIntoReplaceOn(ctx)
+          val privileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)
+          val finalQuery = {
+            val queryAliasOpt =
+              getTableAliasWithoutColumnAlias(queryAliasCtx, "INSERT REPLACE ON")
+            queryAliasOpt.map { queryAlias =>
+              withOrigin(queryAliasCtx) {
+                SubqueryAlias(queryAlias, child = query)
+              }
+            }.getOrElse(query)
+          }
           createInsertIntoStatement(
             insertParams = insertParams,
-            ident = ident,
-            query = otherPlans.head,
+            tableSlot = buildWriteTableSlot(
+              insertParams.relationCtx, insertParams.options, privileges),
+            query = finalQuery,
             overwrite = true,
-            writePrivileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE),
             withSchemaEvolution = ctx.EVOLUTION() != null)
-        })
+        }
+      case ctx: InsertIntoReplaceUsingContext =>
+        val insertParams = visitInsertIntoReplaceUsing(ctx)
+        val privileges = Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)
+        createInsertIntoStatement(
+          insertParams = insertParams,
+          tableSlot = buildWriteTableSlot(
+            insertParams.relationCtx, insertParams.options, privileges),
+          query = query,
+          overwrite = true,
+          withSchemaEvolution = ctx.EVOLUTION() != null)
       case dir: InsertOverwriteDirContext =>
         val (isLocal, storage, provider) = visitInsertOverwriteDir(dir)
         InsertIntoDir(isLocal, storage, provider, query, overwrite = true)
@@ -1133,18 +1155,12 @@ class AstBuilder extends DataTypeAstBuilder
    */
   private def createInsertIntoStatement(
       insertParams: InsertTableParams,
-      ident: Seq[String],
+      tableSlot: LogicalPlan,
       query: LogicalPlan,
       overwrite: Boolean,
-      writePrivileges: Set[TableWritePrivilege],
       withSchemaEvolution: Boolean): InsertIntoStatement = {
     InsertIntoStatement(
-      table = createUnresolvedRelation(
-        ctx = insertParams.relationCtx,
-        ident = ident,
-        optionsClause = insertParams.options,
-        writePrivileges = writePrivileges,
-        isStreaming = false),
+      table = tableSlot,
       partitionSpec = insertParams.partitionSpec,
       userSpecifiedCols = insertParams.userSpecifiedCols,
       query = query,
@@ -1155,6 +1171,27 @@ class AstBuilder extends DataTypeAstBuilder
       withSchemaEvolution = withSchemaEvolution)
   }
 
+  /**
+   * Build the `table` slot of a write command. If the identifier reference is a constant string,
+   * returns an [[UnresolvedRelation]] directly; otherwise returns a
+   * [[PlanWithUnresolvedIdentifier]] that materializes into an [[UnresolvedRelation]] once the
+   * identifier expression is resolved. Both branches produce a [[NamedRelation]], so the result
+   * fits `NamedRelation`-typed slots (e.g. `OverwriteByExpression.table`) as well as the more
+   * general `LogicalPlan` slot of `InsertIntoStatement.table`.
+   *
+   * Placing the placeholder in the identifier slot (rather than wrapping the entire write command)
+   * preserves the `CTEInChildren` shape at parse time, so `CTESubstitution` places `WithCTE` on the
+   * command's children correctly. See SPARK-46625.
+   */
+  private def buildWriteTableSlot(
+      ctx: IdentifierReferenceContext,
+      optionsClause: Option[OptionsClauseContext],
+      writePrivileges: Set[TableWritePrivilege]): NamedRelation = {
+    withIdentClause(ctx, parts =>
+      createUnresolvedRelation(ctx, parts, optionsClause, writePrivileges, isStreaming = false))
+      .asInstanceOf[NamedRelation]
+  }
+
   /**
    * Write to a directory, returning a [[InsertIntoDir]] logical plan.
    */
@@ -2362,43 +2399,85 @@ class AstBuilder extends DataTypeAstBuilder
         }
       }
 
-      // Resolve the join type and join condition
-      val (joinType, condition) = Option(ctx.joinCriteria) match {
-        case Some(c) if c.USING != null =>
-          if (ctx.LATERAL != null) {
-            throw QueryParsingErrors.lateralJoinWithUsingJoinUnsupportedError(ctx)
-          }
-          (UsingJoin(baseJoinType, visitIdentifierList(c.identifierList)), None)
-        case Some(c) if c.booleanExpression != null =>
-          (baseJoinType, Option(expression(c.booleanExpression)))
-        case Some(c) =>
-          throw SparkException.internalError(s"Unimplemented joinCriteria: $c")
-        case None if ctx.NATURAL != null =>
-          if (ctx.LATERAL != null) {
-            throw QueryParsingErrors.incompatibleJoinTypesError(
-              joinType1 = ctx.LATERAL.toString, joinType2 = ctx.NATURAL.toString, ctx = ctx
-            )
-          }
-          if (baseJoinType == Cross) {
-            throw QueryParsingErrors.incompatibleJoinTypesError(
-              joinType1 = ctx.NATURAL.toString, joinType2 = baseJoinType.toString, ctx = ctx
-            )
+      if (ctx.nearestByClause != null) {
+        withNearestByJoin(ctx, base, baseJoinType)
+      } else {
+        // Resolve the join type and join condition
+        val (joinType, condition) = Option(ctx.joinCriteria) match {
+          case Some(c) if c.USING != null =>
+            if (ctx.LATERAL != null) {
+              throw QueryParsingErrors.lateralJoinWithUsingJoinUnsupportedError(ctx)
+            }
+            (UsingJoin(baseJoinType, visitIdentifierList(c.identifierList)), None)
+          case Some(c) if c.booleanExpression != null =>
+            (baseJoinType, Option(expression(c.booleanExpression)))
+          case Some(c) =>
+            throw SparkException.internalError(s"Unimplemented joinCriteria: $c")
+          case None if ctx.NATURAL != null =>
+            if (ctx.LATERAL != null) {
+              throw QueryParsingErrors.incompatibleJoinTypesError(
+                joinType1 = ctx.LATERAL.toString, joinType2 = ctx.NATURAL.toString, ctx = ctx
+              )
+            }
+            if (baseJoinType == Cross) {
+              throw QueryParsingErrors.incompatibleJoinTypesError(
+                joinType1 = ctx.NATURAL.toString, joinType2 = baseJoinType.toString, ctx = ctx
+              )
+            }
+            (NaturalJoin(baseJoinType), None)
+          case None =>
+            (baseJoinType, None)
+        }
+        if (ctx.LATERAL != null) {
+          if (!Seq(Inner, Cross, LeftOuter).contains(joinType)) {
+            throw QueryParsingErrors.unsupportedLateralJoinTypeError(ctx, joinType.sql)
           }
-          (NaturalJoin(baseJoinType), None)
-        case None =>
-          (baseJoinType, None)
-      }
-      if (ctx.LATERAL != null) {
-        if (!Seq(Inner, Cross, LeftOuter).contains(joinType)) {
-          throw QueryParsingErrors.unsupportedLateralJoinTypeError(ctx, joinType.sql)
+          LateralJoin(base, LateralSubquery(plan(ctx.right)), joinType, condition)
+        } else {
+          Join(base, plan(ctx.right), joinType, condition, JoinHint.NONE)
         }
-        LateralJoin(base, LateralSubquery(plan(ctx.right)), joinType, condition)
-      } else {
-        Join(base, plan(ctx.right), joinType, condition, JoinHint.NONE)
       }
     }
   }
 
+  /**
+   * Build a [[NearestByJoin]] from the parsed `NEAREST BY` clause attached to a join relation.
+   * Validates that the clause is not combined with `LATERAL` and that the base join type is one
+   * of the supported types (`INNER` or `LEFT OUTER`), parses `num_results` (with bounds checks),
+   * the direction (`DISTANCE` / `SIMILARITY`), and the ranking expression.
+   */
+  private def withNearestByJoin(
+      ctx: JoinRelationContext,
+      base: LogicalPlan,
+      baseJoinType: JoinType): NearestByJoin = {
+    if (ctx.LATERAL != null) {
+      throw QueryParsingErrors.nearestByJoinWithLateralUnsupportedError(ctx)
+    }
+    if (!Seq(Inner, LeftOuter).contains(baseJoinType)) {
+      throw QueryParsingErrors.unsupportedNearestByJoinTypeError(
+        ctx, baseJoinType.sql, NearestByJoinType.supportedDisplay)
+    }
+    val clause = ctx.nearestByClause
+    val approx = clause.APPROX != null
+    val numResults = Option(clause.num).map { n =>
+      // Guard against literals that overflow Long.
+      val value = try n.getText.toLong catch {
+        case _: NumberFormatException =>
+          throw QueryParsingErrors.nearestByJoinNumResultsOutOfRangeError(
+            ctx, n.getText, NearestByJoin.MaxNumResults)
+      }
+      if (value < 1 || value > NearestByJoin.MaxNumResults) {
+        throw QueryParsingErrors.nearestByJoinNumResultsOutOfRangeError(
+          ctx, value.toString, NearestByJoin.MaxNumResults)
+      }
+      value.toInt
+    }.getOrElse(1)
+    val direction = if (clause.DISTANCE != null) NearestByDistance else NearestBySimilarity
+    val rankingExpr = expression(clause.expression)
+    NearestByJoin(
+      base, plan(ctx.right), baseJoinType, approx, numResults, rankingExpr, direction)
+  }
+
   /**
    * Add a [[Sample]] to a logical plan.
    *
@@ -2406,10 +2485,14 @@ class AstBuilder extends DataTypeAstBuilder
    * - TABLESAMPLE(x ROWS): Sample the table down to the given number of rows.
    * - TABLESAMPLE(x PERCENT) [REPEATABLE (y)]: Sample the table down to the given percentage with
    * seed 'y'. Note that percentages are defined as a number between 0 and 100.
+   * - TABLESAMPLE SYSTEM(x PERCENT): Sample by data-source-dependent blocks or file splits.
    * - TABLESAMPLE(BUCKET x OUT OF y) [REPEATABLE (z)]: Sample the table down to a 'x' divided by
    * 'y' fraction with seed 'z'.
    */
   private def withSample(ctx: SampleContext, query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    val isSystem = ctx.sampleType != null &&
+      ctx.sampleType.getType == SqlBaseParser.SYSTEM
+
     // Create a sampled plan if we need one.
     def sample(fraction: Double, seed: Option[Long]): Sample = {
       // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
@@ -2419,17 +2502,25 @@ class AstBuilder extends DataTypeAstBuilder
       validate(fraction >= 0.0 - eps && fraction <= 1.0 + eps,
         s"Sampling fraction ($fraction) must be on interval [0, 1]",
         ctx)
-      Sample(0.0, fraction, withReplacement = false, seed, query)
+      val method = if (isSystem) SampleMethod.System else SampleMethod.Bernoulli
+      Sample(0.0, fraction, withReplacement = false, seed, query, method)
     }
 
     if (ctx.sampleMethod() == null) {
       throw QueryParsingErrors.emptyInputForTableSampleError(ctx)
     }
 
+    if (isSystem && ctx.seed != null) {
+      throw QueryParsingErrors.tableSampleSystemRepeatableError(ctx)
+    }
+
     val seed: Option[Long] = Option(ctx.seed).map(_.getText.toLong)
 
     ctx.sampleMethod() match {
       case ctx: SampleByRowsContext =>
+        if (isSystem) {
+          throw QueryParsingErrors.tableSampleSystemSampleMethodError("ROWS", ctx)
+        }
         Limit(expression(ctx.expression), query)
 
       case ctx: SampleByPercentileContext =>
@@ -2441,6 +2532,9 @@ class AstBuilder extends DataTypeAstBuilder
         sample(sign * fraction / 100.0d, seed)
 
       case ctx: SampleByBytesContext =>
+        if (isSystem) {
+          throw QueryParsingErrors.tableSampleSystemSampleMethodError("BYTES", ctx)
+        }
         val bytesStr = ctx.bytes.getText
         if (bytesStr.matches("[0-9]+[bBkKmMgG]")) {
           throw QueryParsingErrors.tableSampleByBytesUnsupportedError("byteLengthLiteral", ctx)
@@ -2449,6 +2543,9 @@ class AstBuilder extends DataTypeAstBuilder
         }
 
       case ctx: SampleByBucketContext if ctx.ON() != null =>
+        if (isSystem) {
+          throw QueryParsingErrors.tableSampleSystemSampleMethodError("BUCKET", ctx)
+        }
         if (ctx.identifier != null) {
           throw QueryParsingErrors.tableSampleByBytesUnsupportedError(
             "BUCKET x OUT OF y ON colname", ctx)
@@ -2458,6 +2555,9 @@ class AstBuilder extends DataTypeAstBuilder
         }
 
       case ctx: SampleByBucketContext =>
+        if (isSystem) {
+          throw QueryParsingErrors.tableSampleSystemSampleMethodError("BUCKET", ctx)
+        }
         sample(ctx.numerator.getText.toDouble / ctx.denominator.getText.toDouble, seed)
     }
   }
@@ -2535,17 +2635,17 @@ class AstBuilder extends DataTypeAstBuilder
     withOrigin(ctx) {
       val relation = createUnresolvedRelation(ctx.identifierReference, Option(ctx.optionsClause))
       val options = resolveOptions(Option(ctx.optionsClause))
-      val changelogInfo = buildChangelogInfo(ctx.changesClause, options)
-      val result = RelationChanges(relation, changelogInfo)
+      val changelogContext = buildChangelogContext(ctx.changesClause, options)
+      val result = RelationChanges(relation, changelogContext)
       mayApplyAliasPlan(ctx.tableAlias, result)
     }
 
   /**
-   * Build a [[ChangelogInfo]] from a batch changesClause context and optional WITH options.
+   * Build a [[ChangelogContext]] from a batch changesClause context and optional WITH options.
    */
-  private def buildChangelogInfo(
+  private def buildChangelogContext(
       ctx: ChangesClauseContext,
-      options: CaseInsensitiveStringMap): ChangelogInfo = {
+      options: CaseInsensitiveStringMap): ChangelogContext = {
     val startExclusive = ctx.startExclusive != null
     val endExclusive = ctx.endExclusive != null
     val startInclusive = !startExclusive
@@ -2590,16 +2690,16 @@ class AstBuilder extends DataTypeAstBuilder
     }
 
     val (deduplicationMode, computeUpdates) = resolveChangelogOptions(options)
-    new ChangelogInfo(range, deduplicationMode, computeUpdates)
+    new ChangelogContext(range, deduplicationMode, computeUpdates)
   }
 
   /**
-   * Build a [[ChangelogInfo]] from a streaming streamChangesClause context and optional
+   * Build a [[ChangelogContext]] from a streaming streamChangesClause context and optional
    * WITH options.
    */
-  private def buildStreamChangelogInfo(
+  private def buildStreamChangelogContext(
       ctx: StreamChangesClauseContext,
-      options: CaseInsensitiveStringMap): ChangelogInfo = {
+      options: CaseInsensitiveStringMap): ChangelogContext = {
     val startExclusive = ctx.startExclusive != null
     val startInclusive = !startExclusive
 
@@ -2630,7 +2730,7 @@ class AstBuilder extends DataTypeAstBuilder
     }
 
     val (deduplicationMode, computeUpdates) = resolveChangelogOptions(options)
-    new ChangelogInfo(range, deduplicationMode, computeUpdates)
+    new ChangelogContext(range, deduplicationMode, computeUpdates)
   }
 
   /**
@@ -2638,17 +2738,8 @@ class AstBuilder extends DataTypeAstBuilder
    * Defaults: DROP_CARRYOVERS for deduplicationMode, false for computeUpdates.
    */
   private def resolveChangelogOptions(
-      options: CaseInsensitiveStringMap)
-      : (ChangelogInfo.DeduplicationMode, Boolean) = {
-    val deduplicationModeStr = Option(options.get("deduplicationMode"))
-      .getOrElse("dropCarryovers").toLowerCase(Locale.ROOT)
-    val deduplicationMode = deduplicationModeStr match {
-      case "none" => ChangelogInfo.DeduplicationMode.NONE
-      case "dropcarryovers" => ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS
-      case "netchanges" => ChangelogInfo.DeduplicationMode.NET_CHANGES
-      case other =>
-        throw QueryCompilationErrors.invalidCdcOptionInvalidDeduplicationMode(other)
-    }
+      options: CaseInsensitiveStringMap): (ChangelogContext.DeduplicationMode, Boolean) = {
+    val deduplicationMode = ChangelogContextUtils.parseDeduplicationMode(options)
     val computeUpdates = options.getBoolean("computeUpdates", false)
     (deduplicationMode, computeUpdates)
   }
@@ -2811,8 +2902,8 @@ class AstBuilder extends DataTypeAstBuilder
       case Some(changesCtx) =>
         // Streaming CDC: wrap in RelationChanges and NamedStreamingRelation
         val options = resolveOptions(Option(ctx.optionsClause))
-        val changelogInfo = buildStreamChangelogInfo(changesCtx, options)
-        val result = RelationChanges(relation, changelogInfo)
+        val changelogContext = buildStreamChangelogContext(changesCtx, options)
+        val result = RelationChanges(relation, changelogContext)
         val table = mayApplyAliasPlan(ctx.tableAlias, result)
         val tableWithWatermark = table.optionalMap(ctx.watermarkClause)(withWatermark)
         val sourceNameOpt = extractSourceName(ctx.identifiedByClause)
@@ -2870,7 +2961,7 @@ class AstBuilder extends DataTypeAstBuilder
         // inline table comes in two styles:
         // style 1: values (1), (2), (3)  -- multiple columns are supported
         // style 2: values 1, 2, 3  -- only a single column is supported here
-        // Strip Alias wrappers from row values — CreateStruct.apply preserves them for
+        // Strip Alias wrappers from row values - CreateStruct.apply preserves them for
         // expressions like `(1 AS id, 'a' AS name)`, but they are redundant here since
         // column names are determined by the table alias or generated defaults.
         case struct: CreateNamedStruct => struct.valExprs.map {
@@ -3486,11 +3577,7 @@ class AstBuilder extends DataTypeAstBuilder
   }
 
   override def visitCollateClause(ctx: CollateClauseContext): Seq[String] = withOrigin(ctx) {
-    val collationName = visitMultipartIdentifier(ctx.collationName)
-    if (!SQLConf.get.trimCollationEnabled && collationName.last.toUpperCase().contains("TRIM")) {
-      throw QueryCompilationErrors.trimCollationNotEnabledError()
-    }
-    collationName
+    visitMultipartIdentifier(ctx.collationName)
   }
 
   /**
@@ -4286,8 +4373,12 @@ class AstBuilder extends DataTypeAstBuilder
   protected def createUnresolvedTableOrView(
       ctx: IdentifierReferenceContext,
       commandName: String,
-      allowTempView: Boolean = true): LogicalPlan = withOrigin(ctx) {
-    withIdentClause(ctx, UnresolvedTableOrView(_, commandName, allowTempView))
+      allowTempView: Boolean = true,
+      tableNotFoundSearchPathMode: UnresolvedTableOrViewSearchPathMode =
+        UnresolvedTableOrViewSearchPathMode.Ddl): LogicalPlan = withOrigin(ctx) {
+    withIdentClause(
+      ctx,
+      UnresolvedTableOrView(_, commandName, allowTempView, tableNotFoundSearchPathMode))
   }
 
   private def createUnresolvedTableOrView(
@@ -5590,42 +5681,45 @@ class AstBuilder extends DataTypeAstBuilder
         bucketSpec.map(_.asTransform) ++
         clusterBySpec.map(_.asTransform)
 
-    val asSelectPlan = Option(ctx.query).map(plan).toSeq
-    withIdentClause(identifierContext, asSelectPlan, (identifiers, otherPlans) => {
-      val namedConstraints =
-        constraints.map(c => c.withTableName(identifiers.last))
-      val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
-        collation, serdeInfo, external, namedConstraints)
-      val identifier = withOrigin(identifierContext) {
-        UnresolvedIdentifier(identifiers)
-      }
-      otherPlans.headOption match {
-        case Some(_) if columns.nonEmpty =>
+    Option(ctx.query).map(plan) match {
+      case Some(query) =>
+        // CTAS path: push the identifier placeholder into the `name` slot so that
+        // `CTESubstitution` sees the `CreateTableAsSelect` (a `CTEInChildren`) directly
+        // and places `WithCTE` on its children (SPARK-46625). CTAS disallows constraints /
+        // user-specified columns / non-reference partition columns, so we don't need the
+        // identifier parts at parse time.
+        if (columns.nonEmpty) {
           operationNotAllowed(
-            "Schema may not be specified in a Create Table As Select (CTAS) statement",
-            ctx)
-
-        case Some(_) if partCols.nonEmpty =>
-          // non-reference partition columns are not allowed because schema can't be specified
+            "Schema may not be specified in a Create Table As Select (CTAS) statement", ctx)
+        }
+        if (partCols.nonEmpty) {
           operationNotAllowed(
-            "Partition column types may not be specified in Create Table As Select (CTAS)",
-            ctx)
-
-        case Some(_) if constraints.nonEmpty =>
+            "Partition column types may not be specified in Create Table As Select (CTAS)", ctx)
+        }
+        if (constraints.nonEmpty) {
           operationNotAllowed(
-            "Constraints may not be specified in a Create Table As Select (CTAS) statement",
-            ctx)
-
-        case Some(query) =>
-          CreateTableAsSelect(identifier, partitioning, query, tableSpec, Map.empty, ifNotExists)
-
-        case _ =>
+            "Constraints may not be specified in a Create Table As Select (CTAS) statement", ctx)
+        }
+        val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
+          collation, serdeInfo, external, constraints = Nil)
+        val nameSlot = withIdentClause(identifierContext, identifiers =>
+          withOrigin(identifierContext) { UnresolvedIdentifier(identifiers) })
+        CreateTableAsSelect(nameSlot, partitioning, query, tableSpec, Map.empty, ifNotExists)
+      case None =>
+        withIdentClause(identifierContext, identifiers => {
+          val namedConstraints =
+            constraints.map(c => c.withTableName(identifiers.last))
+          val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
+            collation, serdeInfo, external, namedConstraints)
+          val identifier = withOrigin(identifierContext) {
+            UnresolvedIdentifier(identifiers)
+          }
           // Note: table schema includes both the table columns list and the partition columns
           // with data type.
           val allColumns = columns ++ partCols
           CreateTable(identifier, allColumns, partitioning, tableSpec, ignoreIfExists = ifNotExists)
-      }
-    })
+        })
+    }
   }
 
   /**
@@ -5674,43 +5768,42 @@ class AstBuilder extends DataTypeAstBuilder
         clusterBySpec.map(_.asTransform)
 
     val identifierContext = ctx.replaceTableHeader().identifierReference()
-    val asSelectPlan = Option(ctx.query).map(plan).toSeq
-    withIdentClause(identifierContext, asSelectPlan, (identifiers, otherPlans) => {
-      val namedConstraints =
-        constraints.map(c => c.withTableName(identifiers.last))
-      val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
-        collation, serdeInfo, external = false, namedConstraints)
-      val identifier = withOrigin(identifierContext) {
-        UnresolvedIdentifier(identifiers)
-      }
-      otherPlans.headOption match {
-        case Some(_) if columns.nonEmpty =>
+    Option(ctx.query).map(plan) match {
+      case Some(query) =>
+        // RTAS path: push the identifier placeholder into the `name` slot (see CTAS above).
+        if (columns.nonEmpty) {
           operationNotAllowed(
-            "Schema may not be specified in a Replace Table As Select (RTAS) statement",
-            ctx)
-
-        case Some(_) if partCols.nonEmpty =>
-          // non-reference partition columns are not allowed because schema can't be specified
+            "Schema may not be specified in a Replace Table As Select (RTAS) statement", ctx)
+        }
+        if (partCols.nonEmpty) {
           operationNotAllowed(
-            "Partition column types may not be specified in Replace Table As Select (RTAS)",
-            ctx)
-
-        case Some(_) if constraints.nonEmpty =>
+            "Partition column types may not be specified in Replace Table As Select (RTAS)", ctx)
+        }
+        if (constraints.nonEmpty) {
           operationNotAllowed(
-            "Constraints may not be specified in a Replace Table As Select (RTAS) statement",
-            ctx)
-
-        case Some(query) =>
-          ReplaceTableAsSelect(identifier, partitioning, query, tableSpec,
-            writeOptions = Map.empty, orCreate = orCreate)
-
-        case _ =>
+            "Constraints may not be specified in a Replace Table As Select (RTAS) statement", ctx)
+        }
+        val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
+          collation, serdeInfo, external = false, constraints = Nil)
+        val nameSlot = withIdentClause(identifierContext, identifiers =>
+          withOrigin(identifierContext) { UnresolvedIdentifier(identifiers) })
+        ReplaceTableAsSelect(nameSlot, partitioning, query, tableSpec,
+          writeOptions = Map.empty, orCreate = orCreate)
+      case None =>
+        withIdentClause(identifierContext, identifiers => {
+          val namedConstraints =
+            constraints.map(c => c.withTableName(identifiers.last))
+          val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
+            collation, serdeInfo, external = false, namedConstraints)
+          val identifier = withOrigin(identifierContext) {
+            UnresolvedIdentifier(identifiers)
+          }
           // Note: table schema includes both the table columns list and the partition columns
           // with data type.
           val allColumns = columns ++ partCols
           ReplaceTable(identifier, allColumns, partitioning, tableSpec, orCreate = orCreate)
-      }
-    })
+        })
+    }
   }
 
   /**
@@ -6452,35 +6545,74 @@ class AstBuilder extends DataTypeAstBuilder
    * }}}
    */
   override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
-    val query = Option(ctx.query).map(plan)
-    withIdentClause(ctx.identifierReference, query.toSeq, (ident, children) => {
-      if (query.isDefined && ident.length > 1) {
-        val catalogAndNamespace = ident.init
-        throw QueryParsingErrors.addCatalogInCacheTableAsSelectNotAllowedError(
-          catalogAndNamespace.quoted, ctx)
-      }
-      val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-      val isLazy = ctx.LAZY != null
-      if (query.isDefined) {
+    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    val isLazy = ctx.LAZY != null
+    Option(ctx.query).map(plan) match {
+      case Some(query) =>
         // Disallow parameter markers in the query of the cache.
         // We need this limitation because we store the original query text, pre substitution.
-        // To lift this we would need to reconstitute the query with parameter markers replaced with
-        // the values given at CACHE TABLE time, or we would need to store the parameter values
-        // alongside the text.
-        // The same rule can be found in CREATE VIEW builder.
-        checkInvalidParameter(query.get, "the query of CACHE TABLE")
-        CacheTableAsSelect(ident.head, children.head, source(ctx.query()), isLazy, options)
-      } else {
-        CacheTable(
-          createUnresolvedRelation(
-            ctx.identifierReference,
-            ident,
-            None,
-            writePrivileges = Set.empty,
-            isStreaming = false),
-          ident, isLazy, options)
+        // To lift this we would need to reconstitute the query with parameter markers replaced
+        // with the values given at CACHE TABLE time, or we would need to store the parameter
+        // values alongside the text. The same rule can be found in CREATE VIEW builder.
+        checkInvalidParameter(query, "the query of CACHE TABLE")
+        // `CacheTableAsSelect.tempViewName` is an `Expression` slot: a `Literal` for direct
+        // identifiers and `IDENTIFIER('literal-string')`, or an
+        // `ExpressionWithUnresolvedIdentifier` for `IDENTIFIER(<non-literal>)`. Building the name
+        // as an expression avoids the wrap-the-whole-command form (where the
+        // `PlanWithUnresolvedIdentifier` would wrap the entire `CacheTableAsSelect`), which is the
+        // last shape that motivated the `WithCTE(<command>, _)` workaround chain in SPARK-46625.
+        val nameExpr = buildCacheTableAsSelectName(ctx.identifierReference, ctx)
+        CacheTableAsSelect(nameExpr, query, source(ctx.query()), isLazy, options)
+      case None =>
+        withIdentClause(ctx.identifierReference, ident => {
+          CacheTable(
+            createUnresolvedRelation(
+              ctx.identifierReference,
+              ident,
+              None,
+              writePrivileges = Set.empty,
+              isStreaming = false),
+            ident, isLazy, options)
+        })
+    }
+  }
+
+  /**
+   * Build the `tempViewName` expression for a `CACHE TABLE ... AS SELECT` command from an
+   * `identifierReference` context.
+   *
+   * `CacheTableAsSelect` requires a single-part temp view name (no catalog/namespace). For direct
+   * identifiers and `IDENTIFIER('literal-string')` we validate this at parse time and produce a
+   * non-null string `Literal`. For `IDENTIFIER(<non-literal>)` we emit an
+   * `ExpressionWithUnresolvedIdentifier` whose builder validates the single-part invariant when
+   * the identifier expression is resolved.
+   */
+  private def buildCacheTableAsSelectName(
+      ctx: IdentifierReferenceContext,
+      parentCtx: CacheTableContext): Expression = {
+    // Use the outer `parentCtx` for the multi-part error so the query context points at the
+    // whole `CACHE TABLE ... AS ...` statement, not just the identifier reference. The caller
+    // (`visitCacheTable`) already has `withOrigin(parentCtx)` in scope.
+    def singlePart(parts: Seq[String]): String = {
+      if (parts.length > 1) {
+        throw QueryParsingErrors.addCatalogInCacheTableAsSelectNotAllowedError(
+          parts.init.quoted, parentCtx)
       }
-    })
+      parts.head
+    }
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      expression(exprCtx) match {
+        case Literal(value, _: StringType) if value != null =>
+          Literal(singlePart(parseMultipartIdentifier(value.toString)))
+        case expr =>
+          new ExpressionWithUnresolvedIdentifier(
+            withOrigin(exprCtx) { expr },
+            parts => Literal(singlePart(parts)))
+      }
+    } else {
+      Literal(singlePart(visitMultipartIdentifier(ctx.multipartIdentifier)))
+    }
   }
 
   /**
@@ -7019,11 +7151,13 @@ class AstBuilder extends DataTypeAstBuilder
       dataTypeOpt.map { dt => default.copy(child = Cast(default.child, dt)) }.getOrElse(default)
     }
     CreateVariable(
-      ctx.identifierReferences.asScala.map (
-        identifierReference => {
-          withIdentClause(identifierReference, UnresolvedIdentifier(_))
-        }
-      ).toSeq,
+      ctx.identifierReferences.asScala.map { identifierReference =>
+        // Give each `UnresolvedIdentifier` its own origin pointing at the variable name
+        // fragment so analyzer-time errors (e.g. UNRESOLVED_VARIABLE) can highlight just
+        // that identifier rather than the whole `DECLARE ...` statement.
+        withIdentClause(identifierReference, parts =>
+          withOrigin(identifierReference) { UnresolvedIdentifier(parts) })
+      }.toSeq,
       defaultExpression,
       ctx.REPLACE() != null
     )
@@ -7039,7 +7173,8 @@ class AstBuilder extends DataTypeAstBuilder
    */
   override def visitDropVariable(ctx: DropVariableContext): LogicalPlan = withOrigin(ctx) {
     DropVariable(
-      withIdentClause(ctx.identifierReference(), UnresolvedIdentifier(_)),
+      withIdentClause(ctx.identifierReference(), parts =>
+        withOrigin(ctx.identifierReference()) { UnresolvedIdentifier(parts) }),
       ctx.EXISTS() != null
     )
   }
@@ -7164,7 +7299,7 @@ class AstBuilder extends DataTypeAstBuilder
       // The SET variable source is a query
       val variables = multipartIdentifierList.multipartIdentifier.asScala.map { variableIdent =>
         val varName = visitMultipartIdentifier(variableIdent)
-        UnresolvedAttribute(varName)
+        withOrigin(variableIdent) { UnresolvedAttribute(varName) }
       }.toSeq
       SetVariable(variables, visitQuery(query))
     } else {
@@ -7176,7 +7311,7 @@ class AstBuilder extends DataTypeAstBuilder
           case n: NamedExpression => n
           case e => Alias(e, varIdent.last)()
         }
-        (UnresolvedAttribute(varIdent), varNamedExpr)
+        (withOrigin(assign.key) { UnresolvedAttribute(varIdent) }, varNamedExpr)
       }.toSeq.unzip
       SetVariable(variables, Project(values, OneRowRelation()))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParameterHandler.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParameterHandler.scala
index 715dccdc10737..77358a1eb139e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParameterHandler.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParameterHandler.scala
@@ -17,6 +17,8 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.util.SchemaUtils
 
 /**
  * Handler for parameter substitution across different Spark SQL contexts.
@@ -107,14 +109,39 @@ object ParameterHandler {
    * @param expr The expression to convert (must be a Literal)
    * @return SQL string representation
    */
-  private def convertToSql(expr: Expression): String = expr match {
-    case lit: Literal => lit.sql
-    case other =>
-      throw new IllegalArgumentException(
-        s"ParameterHandler only accepts resolved Literal expressions. " +
-        s"Received: ${other.getClass.getSimpleName}. " +
-        s"All parameters must be resolved using SparkSession.resolveAndValidateParameters " +
-        s"before being passed to the pre-parser.")
+  private def convertToSql(expr: Expression): String = {
+    // Converts an expression to its SQL representation. If the expression's type contains collated
+    // types, strips collations from nested literals and wraps the whole expression in
+    // CAST to preserve the collation with implicit strength. Without this, Literal.sql
+    // produces `'value' COLLATE collationName` which re-parses with explicit strength.
+    def toSqlWithImplicitCollation(e: Expression): String = {
+      if (!DataTypeUtils.hasNonDefaultStringCharOrVarcharType(e.dataType)) {
+        e.sql
+      } else {
+        val stripped = e.transform {
+          case lit: Literal
+              if DataTypeUtils.hasNonDefaultStringCharOrVarcharType(lit.dataType) =>
+            Literal.create(
+              lit.value, SchemaUtils.replaceCollatedStringWithString(lit.dataType))
+        }
+        s"CAST(${stripped.sql} AS ${e.dataType.sql})"
+      }
+    }
+
+    expr match {
+      case lit: Literal if lit.value == null =>
+        // NULL literals should have default collation strength, so even though `lit.sql` will wrap
+        // NULL in a CAST, `CollationTypeCoercion` will resolve it as default.
+        lit.sql
+      case lit: Literal =>
+        toSqlWithImplicitCollation(lit)
+      case other =>
+        throw new IllegalArgumentException(
+          s"ParameterHandler only accepts resolved Literal expressions. " +
+          s"Received: ${other.getClass.getSimpleName}. " +
+          s"All parameters must be resolved using SparkSession.resolveAndValidateParameters " +
+          s"before being passed to the pre-parser.")
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index b95c4624b8c50..c4af18fc8739a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -432,7 +432,7 @@ object ExtractSingleColumnNullAwareAntiJoin extends JoinSelectionHelper with Pre
  *  - the read relation that can be either [[DataSourceV2Relation]] or [[DataSourceV2ScanRelation]]
  *  depending on whether the planning has already happened;
  */
-object GroupBasedRowLevelOperation {
+object GroupBasedRowLevelOperation extends RowLevelOperationExtractor {
   type ReturnType = (ReplaceData, Expression, Option[Expression], LogicalPlan)
 
   def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
@@ -445,8 +445,34 @@ object GroupBasedRowLevelOperation {
     case _ =>
       None
   }
+}
+
+/**
+ * An extractor for row-level commands such as DELETE, UPDATE, MERGE that were rewritten using plans
+ * that operate on individual rows (row deltas).
+ *
+ * This class extracts the following entities:
+ *  - the delta-based rewrite plan;
+ *  - the condition that defines matching rows;
+ *  - the group filter condition;
+ *  - the read relation that can be either [[DataSourceV2Relation]] or [[DataSourceV2ScanRelation]]
+ *  depending on whether the planning has already happened;
+ */
+object DeltaBasedRowLevelOperation extends RowLevelOperationExtractor {
+  type ReturnType = (WriteDelta, Expression, Option[Expression], LogicalPlan)
+
+  def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
+    case wd @ WriteDelta(ExtractV2Table(table), cond, query, _, _, groupFilterCond, _) =>
+      val readRelation = findReadRelation(table, query, allowMultipleReads = false)
+      readRelation.map((wd, cond, groupFilterCond, _))
+
+    case _ =>
+      None
+  }
+}
 
-  private def findReadRelation(
+trait RowLevelOperationExtractor {
+  protected def findReadRelation(
       table: Table,
       plan: LogicalPlan,
       allowMultipleReads: Boolean): Option[LogicalPlan] = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index 9f8c62fe58408..790307e44ec94 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -181,3 +181,65 @@ object LateralJoinType {
       )
   }
 }
+
+object NearestByDirection {
+
+  /** @see [[NearestByJoinValidation.SupportedDirections]] */
+  val supported: Seq[String] = NearestByJoinValidation.SupportedDirections
+
+  def apply(direction: String): NearestByDirection = {
+    direction.toLowerCase(Locale.ROOT) match {
+      case "distance" => NearestByDistance
+      case "similarity" => NearestBySimilarity
+      case _ =>
+        throw new AnalysisException(
+          errorClass = "NEAREST_BY_JOIN.UNSUPPORTED_DIRECTION",
+          messageParameters = Map(
+            "direction" -> direction,
+            "supported" -> supported.mkString("'", "', '", "'")))
+    }
+  }
+}
+
+sealed abstract class NearestByDirection
+
+case object NearestByDistance extends NearestByDirection
+case object NearestBySimilarity extends NearestByDirection
+
+object NearestByJoinType {
+
+  /** @see [[NearestByJoinValidation.SupportedJoinTypes]] */
+  val supported: Seq[String] = NearestByJoinValidation.SupportedJoinTypes
+
+  /** @see [[NearestByJoinValidation.SupportedJoinTypeDisplay]] */
+  val supportedDisplay: String = NearestByJoinValidation.SupportedJoinTypeDisplay
+
+  def apply(typ: String): JoinType = typ.toLowerCase(Locale.ROOT).replace("_", "") match {
+    case "inner" => Inner
+    case "leftouter" | "left" => LeftOuter
+    case _ =>
+      throw new AnalysisException(
+        errorClass = "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+        messageParameters = Map(
+          "joinType" -> typ,
+          "supported" -> supportedDisplay))
+  }
+}
+
+object NearestByJoinMode {
+
+  /** @see [[NearestByJoinValidation.SupportedModes]] */
+  val supported: Seq[String] = NearestByJoinValidation.SupportedModes
+
+  /** Returns true for APPROX, false for EXACT. */
+  def apply(mode: String): Boolean = mode.toLowerCase(Locale.ROOT) match {
+    case "approx" => true
+    case "exact" => false
+    case _ =>
+      throw new AnalysisException(
+        errorClass = "NEAREST_BY_JOIN.UNSUPPORTED_MODE",
+        messageParameters = Map(
+          "mode" -> mode,
+          "supported" -> supported.mkString("'", "', '", "'")))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NearestByJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NearestByJoin.scala
new file mode 100644
index 0000000000000..4aaac7dfe546a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/NearestByJoin.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.plans.{Inner, JoinType, LeftOuter, NearestByDirection, NearestByJoinValidation}
+import org.apache.spark.sql.catalyst.trees.TreePattern._
+
+object NearestByJoin {
+  /** @see [[NearestByJoinValidation.MaxNumResults]] */
+  val MaxNumResults: Int = NearestByJoinValidation.MaxNumResults
+}
+
+/**
+ * A logical plan for a nearest-by top-K ranking join. For each row on the left side it returns
+ * up to `numResults` rows from the right side ordered by `rankingExpression`:
+ *   - `NearestByDistance`: smallest values of `rankingExpression` first.
+ *   - `NearestBySimilarity`: largest values of `rankingExpression` first.
+ *
+ * The `approx` field records the user's APPROX/EXACT choice. Today both modes use the same
+ * brute-force rewrite. The flag is preserved on the logical plan so that future indexed
+ * approximate-nearest-neighbor strategies can fire only when `approx = true`, leaving EXACT
+ * queries unaffected.
+ *
+ * @param left  The left (query) side of the join.
+ * @param right The right (base) side of the join, against which each left row finds matches.
+ * @param joinType  Must be `Inner` or `LeftOuter`. `Inner` drops left rows with no matches;
+ *                  `LeftOuter` preserves them with `NULL` right-side columns.
+ * @param approx  `true` for `APPROX` mode, `false` for `EXACT` mode. `APPROX` is the contract
+ *                future indexed approximate-nearest-neighbor strategies key off.
+ * @param numResults  The K in top-K: the maximum number of right-side matches returned per
+ *                    left row. Bounded above by `NearestByJoin.MaxNumResults`.
+ * @param rankingExpression  Scalar expression evaluated per (left, right) pair. Must return
+ *                           an orderable type. Rows are ranked by its value, with ordering
+ *                           determined by `direction`.
+ * @param direction  `NearestByDistance` (smaller is better) or `NearestBySimilarity` (larger
+ *                   is better). Selects whether the rewrite uses `MIN_BY` or `MAX_BY`.
+ */
+case class NearestByJoin(
+    left: LogicalPlan,
+    right: LogicalPlan,
+    joinType: JoinType,
+    approx: Boolean,
+    numResults: Int,
+    rankingExpression: Expression,
+    direction: NearestByDirection)
+  extends BinaryNode with SupportsNonDeterministicExpression {
+
+  require(Seq(Inner, LeftOuter).contains(joinType),
+    s"Unsupported nearest-by join type $joinType")
+
+  // Both APPROX and EXACT permit a nondeterministic ranking expression (e.g. `rand()` for
+  // randomized tie-breaking, or an external scoring UDF).
+  override def allowNonDeterministicExpression: Boolean = true
+
+  // Both left- and right-side attributes are declared nullable to match the schema produced
+  // by `RewriteNearestByJoin`. Right-side attributes are widened because the rewrite
+  // materializes them through `Inline` over `MaxMinByK`'s `ArrayType(.., containsNull = true)`,
+  // which widens every struct field to nullable. Left-side attributes are widened because the
+  // rewrite carries each left column through a `First` aggregate, whose result type is always
+  // nullable (`First` may return `null` for empty groups). Declaring both nullable here keeps
+  // the analyzed schema consistent with the optimized plan (and with what users see in cached
+  // or written outputs).
+  override def output: Seq[Attribute] =
+    left.output.map(_.withNullability(true)) ++ right.output.map(_.withNullability(true))
+
+  def duplicateResolved: Boolean = left.outputSet.intersect(right.outputSet).isEmpty
+
+  override lazy val resolved: Boolean = {
+    childrenResolved &&
+      expressions.forall(_.resolved) &&
+      duplicateResolved
+  }
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(NEAREST_BY_JOIN)
+
+  override protected def withNewChildrenInternal(
+      newLeft: LogicalPlan, newRight: LogicalPlan): NearestByJoin = {
+    copy(left = newLeft, right = newRight)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 8e9f264698caf..6d37aa0f9f6b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1912,6 +1912,14 @@ object SubqueryAlias {
   }
 }
 
+sealed trait SampleMethod extends Serializable
+object SampleMethod {
+  /** Row-level sampling (BERNOULLI). Each row independently selected. No I/O savings. */
+  case object Bernoulli extends SampleMethod
+  /** System-level sampling (SYSTEM). Entire partitions/splits included or skipped. */
+  case object System extends SampleMethod
+}
+
 object Sample {
   /**
    * Convenience constructor that wraps a concrete seed in [[Some]].
@@ -1926,6 +1934,16 @@ object Sample {
       child: LogicalPlan): Sample = {
     new Sample(lowerBound, upperBound, withReplacement, Some(seed), child)
   }
+
+  def apply(
+      lowerBound: Double,
+      upperBound: Double,
+      withReplacement: Boolean,
+      seed: Long,
+      child: LogicalPlan,
+      sampleMethod: SampleMethod): Sample = {
+    new Sample(lowerBound, upperBound, withReplacement, Some(seed), child, sampleMethod)
+  }
 }
 
 /**
@@ -1939,13 +1957,15 @@ object Sample {
  *             (SQL `REPEATABLE` clause or programmatic API), `None` when no seed was
  *             specified and a random seed should be generated at execution time.
  * @param child the LogicalPlan
+ * @param sampleMethod the sampling method (Bernoulli or System)
  */
 case class Sample(
     lowerBound: Double,
     upperBound: Double,
     withReplacement: Boolean,
     seed: Option[Long],
-    child: LogicalPlan) extends UnaryNode {
+    child: LogicalPlan,
+    sampleMethod: SampleMethod = SampleMethod.Bernoulli) extends UnaryNode {
 
   val eps = RandomSampler.roundingEpsilon
   val fraction = upperBound - lowerBound
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index c38377582c156..4fbe71ed7d3e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
 import org.apache.spark.sql.connector.catalog.ColumnDefaultValue
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.DataType
+import org.apache.spark.util.collection.BitSet
 
 /**
  * A logical plan node that contains exactly what was parsed from SQL.
@@ -188,7 +189,11 @@ case class InsertIntoStatement(
     byName: Boolean = false,
     replaceCriteriaOpt: Option[InsertReplaceCriteria] = None,
     withSchemaEvolution: Boolean = false)
-  extends UnaryParsedStatement {
+  // Extends TransactionalWrite so that QueryExecution can detect a potential transaction on the
+  // unresolved logical plan before analysis runs. InsertIntoStatement is shared between V1 and V2
+  // inserts, but the LookupCatalog.TransactionalWrite extractor only matches when the target
+  // catalog implements TransactionalCatalogPlugin, so V1 inserts are never assigned a transaction.
+  extends UnaryParsedStatement with TransactionalWrite {
 
   require(overwrite || !ifPartitionNotExists,
     "IF NOT EXISTS is only valid in INSERT OVERWRITE")
@@ -206,6 +211,16 @@ case class InsertIntoStatement(
   override def child: LogicalPlan = query
   override protected def withNewChildInternal(newChild: LogicalPlan): InsertIntoStatement =
     copy(query = newChild)
+
+  // `table` is a non-child LogicalPlan slot (`child = query`), so the default tree-pattern
+  // propagation in TreeNode/QueryPlan does not see patterns inside it. Add `table`'s bits here
+  // so that `containsPattern(...)` pruning correctly reports patterns living in `table`
+  // (e.g. `PARAMETER`, `PLAN_WITH_UNRESOLVED_IDENTIFIER`).
+  override protected def getDefaultTreePatternBits: BitSet = {
+    val bits = super.getDefaultTreePatternBits
+    bits.union(table.treePatternBits)
+    bits
+  }
 }
 
 sealed abstract class InsertReplaceCriteria extends Expression with Unevaluable {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index f63de8d1e4656..40cf5009b97dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, DataType, IntegerType, MapType, MetadataBuilder, StringType, StructType}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.BitSet
 
 // For v2 DML commands, it may end up with the v1 fallback code path and need to build a DataFrame
 // which is required by the DS v1 API. We need to keep the analyzed input query plan to build
@@ -106,6 +107,18 @@ trait V2WriteCommand
 
   override def child: LogicalPlan = query
 
+  // `table` is a non-child slot, so the default tree-pattern propagation in TreeNode/QueryPlan
+  // does not see patterns inside it. Add `table`'s bits so that `containsPattern(...)` pruning
+  // correctly reports patterns living in `table` (e.g. `PLAN_WITH_UNRESOLVED_IDENTIFIER`,
+  // `PARAMETER`). Only `OverwriteByExpression` is constructed at parse time with a placeholder
+  // in `table`, but applying this uniformly across all `V2WriteCommand`s keeps the invariant
+  // consistent for any future analyzer-built node that lands a placeholder in the same slot.
+  override protected def getDefaultTreePatternBits: BitSet = {
+    val bits = super.getDefaultTreePatternBits
+    bits.union(table.treePatternBits)
+    bits
+  }
+
   override lazy val resolved: Boolean = table.resolved && query.resolved && outputResolved
 
   def outputResolved: Boolean = {
@@ -141,6 +154,12 @@ trait V2WriteCommand
   def withNewTable(newTable: NamedRelation): V2WriteCommand
 }
 
+/** Trait for streaming write commands that participate in DSv2 transactions. */
+trait V2StreamingWriteCommand extends TransactionalWrite {
+  override def table: NamedRelation
+  def withNewTable(newTable: NamedRelation): V2StreamingWriteCommand
+}
+
 trait V2PartitionCommand extends UnaryCommand {
   def table: LogicalPlan
   def allowPartialPartitionSpec: Boolean = false
@@ -157,7 +176,7 @@ case class AppendData(
     isByName: Boolean,
     withSchemaEvolution: Boolean,
     write: Option[Write] = None,
-    analyzedQuery: Option[LogicalPlan] = None) extends V2WriteCommand {
+    analyzedQuery: Option[LogicalPlan] = None) extends V2WriteCommand with TransactionalWrite {
   override val writePrivileges: Set[TableWritePrivilege] = Set(TableWritePrivilege.INSERT)
   override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery)
   override def withNewTable(newTable: NamedRelation): AppendData = copy(table = newTable)
@@ -194,6 +213,26 @@ object AppendData {
   }
 }
 
+/**
+ * Append data to an existing table as the result of an insert-only MERGE rewrite.
+ *
+ * Functionally equivalent to [[AppendData]] but distinguishes the row-level MERGE rewrite path.
+ */
+case class InsertOnlyMerge(
+    table: NamedRelation,
+    query: LogicalPlan,
+    write: Option[Write] = None,
+    analyzedQuery: Option[LogicalPlan] = None) extends V2WriteCommand with TransactionalWrite {
+  override val isByName: Boolean = false
+  override val withSchemaEvolution: Boolean = false
+  override val writePrivileges: Set[TableWritePrivilege] = Set(TableWritePrivilege.INSERT)
+  override def withNewQuery(newQuery: LogicalPlan): InsertOnlyMerge = copy(query = newQuery)
+  override def withNewTable(newTable: NamedRelation): InsertOnlyMerge = copy(table = newTable)
+  override def storeAnalyzedQuery(): Command = copy(analyzedQuery = Some(query))
+  override protected def withNewChildInternal(newChild: LogicalPlan): InsertOnlyMerge =
+    copy(query = newChild)
+}
+
 /**
  * Overwrite data matching a filter in an existing table.
  */
@@ -205,7 +244,7 @@ case class OverwriteByExpression(
     isByName: Boolean,
     withSchemaEvolution: Boolean,
     write: Option[Write] = None,
-    analyzedQuery: Option[LogicalPlan] = None) extends V2WriteCommand {
+    analyzedQuery: Option[LogicalPlan] = None) extends V2WriteCommand with TransactionalWrite {
   override val writePrivileges: Set[TableWritePrivilege] =
     Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)
   override lazy val resolved: Boolean = {
@@ -265,7 +304,7 @@ case class OverwritePartitionsDynamic(
     writeOptions: Map[String, String],
     isByName: Boolean,
     withSchemaEvolution: Boolean,
-    write: Option[Write] = None) extends V2WriteCommand {
+    write: Option[Write] = None) extends V2WriteCommand with TransactionalWrite {
   override val writePrivileges: Set[TableWritePrivilege] =
     Set(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)
   override def withNewQuery(newQuery: LogicalPlan): OverwritePartitionsDynamic = {
@@ -425,6 +464,7 @@ case class ReplaceData(
  * @param query a query with a delta of records that should written
  * @param originalTable a plan for the original table for which the row-level command was triggered
  * @param projections projections for row ID, row, metadata attributes
+ * @param groupFilterCondition a condition that can be used to filter groups at runtime
  * @param write a logical write, if already constructed
  */
 case class WriteDelta(
@@ -433,6 +473,7 @@ case class WriteDelta(
     query: LogicalPlan,
     originalTable: NamedRelation,
     projections: WriteDeltaProjections,
+    groupFilterCondition: Option[Expression] = None,
     write: Option[DeltaWrite] = None) extends RowLevelWrite {
 
   override val isByName: Boolean = false
@@ -521,8 +562,10 @@ case class WriteDelta(
 trait V2CreateTableAsSelectPlan
   extends V2CreateTablePlan
     with AnalysisOnlyCommand
-    with CTEInChildren {
+    with CTEInChildren
+    with TransactionalWrite {
   def query: LogicalPlan
+  override def table: LogicalPlan = name
 
   override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = {
     withNameAndQuery(newName = name, newQuery = WithCTE(query, cteDefs))
@@ -949,6 +992,36 @@ case class DescribeColumn(
 
 object DescribeColumn {
   def getOutputAttrs: Seq[Attribute] = DescribeCommandSchema.describeColumnAttributes()
+
+  /**
+   * Extract the column nameParts from the (possibly resolved) column expression on a
+   * `DescribeColumn` command. Used by both the v1 rewrite in `ResolveSessionCatalog` and the
+   * v2 strategy case in `DataSourceV2Strategy` -- centralizing the unwrap means the two paths
+   * cannot drift.
+   *
+   * `ResolveReferences` typically resolves the column against the relation's `output`, so we
+   * see an `Attribute` here. The legacy `UnresolvedAttribute` form is also accepted (e.g. when
+   * the column name doesn't exist in the relation and resolution is skipped). `Alias`
+   * indicates a nested-column reference (`a.b`) which `ResolveReferences` rewrites to
+   * `Alias(GetStructField(...), b)` -- nested columns are unsupported on this command.
+   */
+  def extractColumnNameParts(column: org.apache.spark.sql.catalyst.expressions.Expression)
+      : Seq[String] = {
+    import org.apache.spark.SparkException
+    import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+    import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
+    import org.apache.spark.sql.catalyst.util.toPrettySQL
+    import org.apache.spark.sql.errors.QueryCompilationErrors
+    column match {
+      case u: UnresolvedAttribute => u.nameParts
+      case a: Attribute => a.qualifier :+ a.name
+      case Alias(child, _) =>
+        throw QueryCompilationErrors.commandNotSupportNestedColumnError(
+          "DESC TABLE COLUMN", toPrettySQL(child))
+      case _ =>
+        throw SparkException.internalError(s"[BUG] unexpected column expression: $column")
+    }
+  }
 }
 
 /**
@@ -956,7 +1029,8 @@ object DescribeColumn {
  */
 case class DeleteFromTable(
     table: LogicalPlan,
-    condition: Expression) extends UnaryCommand with SupportsSubquery {
+    condition: Expression)
+  extends UnaryCommand with TransactionalWrite with SupportsSubquery {
   override def child: LogicalPlan = table
   override protected def withNewChildInternal(newChild: LogicalPlan): DeleteFromTable =
     copy(table = newChild)
@@ -978,7 +1052,8 @@ case class DeleteFromTableWithFilters(
 case class UpdateTable(
     table: LogicalPlan,
     assignments: Seq[Assignment],
-    condition: Option[Expression]) extends UnaryCommand with SupportsSubquery {
+    condition: Option[Expression])
+  extends UnaryCommand with TransactionalWrite with SupportsSubquery {
 
   lazy val aligned: Boolean = AssignmentUtils.aligned(table.output, assignments)
 
@@ -1011,7 +1086,10 @@ case class MergeIntoTable(
     notMatchedActions: Seq[MergeAction],
     notMatchedBySourceActions: Seq[MergeAction],
     withSchemaEvolution: Boolean)
-    extends BinaryCommand with WriteWithSchemaEvolution with SupportsSubquery {
+    extends BinaryCommand
+    with WriteWithSchemaEvolution
+    with SupportsSubquery
+    with TransactionalWrite {
 
   override val table: LogicalPlan = EliminateSubqueryAliases(targetTable)
 
@@ -1272,6 +1350,16 @@ case class Assignment(key: Expression, value: Expression) extends Expression
     newLeft: Expression, newRight: Expression): Assignment = copy(key = newLeft, value = newRight)
 }
 
+/**
+ * Marker trait for write operations that participate in a DSv2 transaction.
+ *
+ * Implementations are expected to target a DSv2 catalog backed by a
+ * [[org.apache.spark.sql.connector.catalog.TransactionalCatalogPlugin]].
+ */
+trait TransactionalWrite extends LogicalPlan {
+  def table: LogicalPlan
+}
+
 /**
  * The logical plan of the DROP TABLE command.
  *
@@ -1363,8 +1451,12 @@ case class ShowTablePartition(
 /**
  * The logical plan of the SHOW VIEWS command.
  *
- * Notes: v2 catalogs do not support views API yet, the command will fallback to
- * v1 ShowViewsCommand during ResolveSessionCatalog.
+ * Session-catalog targets fall back to v1 `ShowViewsCommand` via `ResolveSessionCatalog`.
+ * v2 [[org.apache.spark.sql.connector.catalog.ViewCatalog]] catalogs are handled in
+ * `DataSourceV2Strategy` (enumerates via
+ * [[org.apache.spark.sql.connector.catalog.ViewCatalog#listViews]]). Non-ViewCatalog v2
+ * catalogs are rejected up front in `ResolveSessionCatalog` with
+ * `MISSING_CATALOG_ABILITY.VIEWS`.
  */
 case class ShowViews(
     namespace: LogicalPlan,
@@ -1714,19 +1806,42 @@ case class RepairTable(
 
 /**
  * The logical plan of the ALTER VIEW ... AS command.
+ *
+ * Extends [[AnalysisOnlyCommand]] so [[Analyzer.HandleSpecialCommand]] captures
+ * `referredTempFunctions` from [[AnalysisContext]]; this list is needed by
+ * [[CheckViewReferences]] and by the v2 execs when the target is a non-session catalog.
+ * Session-catalog targets are still rewritten to [[AlterViewAsCommand]] by
+ * `ResolveSessionCatalog` and the captured value is dropped there (the v1 command re-captures).
  */
 case class AlterViewAs(
     child: LogicalPlan,
     originalText: String,
-    query: LogicalPlan) extends BinaryCommand with CTEInChildren {
-  override def left: LogicalPlan = child
-  override def right: LogicalPlan = query
+    query: LogicalPlan,
+    isAnalyzed: Boolean = false,
+    referredTempFunctions: Seq[String] = Seq.empty)
+  extends Command with AnalysisOnlyCommand with CTEInChildren {
+
+  override def childrenToAnalyze: Seq[LogicalPlan] = Seq(child, query)
+
+  override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan = copy(
+    isAnalyzed = true,
+    referredTempFunctions = analysisContext.referredTempFunctionNames.toSeq)
+
   override protected def withNewChildrenInternal(
-      newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan =
-    copy(child = newLeft, query = newRight)
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    assert(!isAnalyzed)
+    newChildren match {
+      case Seq(newChild, newQuery) =>
+        copy(child = newChild, query = newQuery)
+      case others =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "_LEGACY_ERROR_TEMP_3218",
+          messageParameters = Map("others" -> others.toString()))
+    }
+  }
 
   override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = {
-    withNewChildren(Seq(child, WithCTE(query, cteDefs)))
+    copy(query = WithCTE(query, cteDefs))
   }
 }
 
@@ -1743,6 +1858,11 @@ case class AlterViewSchemaBinding(
 
 /**
  * The logical plan of the CREATE VIEW ... command.
+ *
+ * Extends [[AnalysisOnlyCommand]] so that [[Analyzer.HandleSpecialCommand]] captures
+ * `referredTempFunctions` from the [[AnalysisContext]] after the child query is analyzed;
+ * this list is needed for `verifyTemporaryObjectsNotExists`-style checks on downstream
+ * execution paths.
  */
 case class CreateView(
     child: LogicalPlan,
@@ -1754,15 +1874,32 @@ case class CreateView(
     query: LogicalPlan,
     allowExisting: Boolean,
     replace: Boolean,
-    viewSchemaMode: ViewSchemaMode) extends BinaryCommand with CTEInChildren {
-  override def left: LogicalPlan = child
-  override def right: LogicalPlan = query
+    viewSchemaMode: ViewSchemaMode,
+    isAnalyzed: Boolean = false,
+    referredTempFunctions: Seq[String] = Seq.empty)
+  extends Command with AnalysisOnlyCommand with CTEInChildren {
+
+  override def childrenToAnalyze: Seq[LogicalPlan] = Seq(child, query)
+
+  override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan = copy(
+    isAnalyzed = true,
+    referredTempFunctions = analysisContext.referredTempFunctionNames.toSeq)
+
   override protected def withNewChildrenInternal(
-      newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan =
-    copy(child = newLeft, query = newRight)
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    assert(!isAnalyzed)
+    newChildren match {
+      case Seq(newChild, newQuery) =>
+        copy(child = newChild, query = newQuery)
+      case others =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "_LEGACY_ERROR_TEMP_3218",
+          messageParameters = Map("others" -> others.toString()))
+    }
+  }
 
   override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = {
-    withNewChildren(Seq(child, WithCTE(query, cteDefs)))
+    copy(query = WithCTE(query, cteDefs))
   }
 }
 
@@ -1830,7 +1967,7 @@ case class CacheTable(
  * The logical plan of the CACHE TABLE ... AS SELECT command.
  */
 case class CacheTableAsSelect(
-    tempViewName: String,
+    tempViewName: Expression,
     plan: LogicalPlan,
     originalText: String,
     isLazy: Boolean,
@@ -1838,6 +1975,19 @@ case class CacheTableAsSelect(
     isAnalyzed: Boolean = false,
     referredTempFunctions: Seq[String] = Seq.empty)
   extends AnalysisOnlyCommand with CTEInChildren {
+
+  /**
+   * Returns the temp view name string. Must only be called after analysis, when `tempViewName`
+   * has been resolved to a non-null string `Literal`. `CheckAnalysis` enforces this invariant.
+   */
+  def tempViewNameString: String = tempViewName match {
+    case Literal(value, _: StringType) if value != null => value.toString
+    case other =>
+      throw SparkException.internalError(
+        "CacheTableAsSelect.tempViewName must be a non-null string literal after analysis, " +
+          s"but got: ${other.sql}")
+  }
+
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[LogicalPlan]): CacheTableAsSelect = {
     assert(!isAnalyzed)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index 7956a9692dc61..a890d43f0672c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -176,6 +176,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.ReplaceIntersectWithSemiJoin" ::
       "org.apache.spark.sql.catalyst.optimizer.ReplaceNullWithFalseInPredicate" ::
       "org.apache.spark.sql.catalyst.optimizer.RewriteAsOfJoin" ::
+      "org.apache.spark.sql.catalyst.optimizer.RewriteNearestByJoin" ::
       "org.apache.spark.sql.catalyst.optimizer.RewriteExceptAll" ::
       "org.apache.spark.sql.catalyst.optimizer.RewriteIntersectAll" ::
       "org.apache.spark.sql.catalyst.optimizer.SimplifyBinaryComparison" ::
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala
index 884a4165d077e..6e0583f778350 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.streaming.OutputMode
  */
 case class WriteToStream(
     name: String,
+    sinkName: Option[String],
     resolvedCheckpointLocation: String,
     sink: Table,
     outputMode: OutputMode,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala
index 7015d0dd3b2cc..61e64a526aeda 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.streaming.{OutputMode, Trigger}
  * rule [[ResolveStreamWrite]].
  *
  * @param userSpecifiedName  Query name optionally specified by the user.
+ * @param userSpecifiedSinkName  Sink name optionally specified by the user for sink evolution.
  * @param userSpecifiedCheckpointLocation  Checkpoint location optionally specified by the user.
  * @param useTempCheckpointLocation  Whether to use a temporary checkpoint location when the user
  *                                   has not specified one. If false, then error will be thrown.
@@ -47,6 +48,7 @@ import org.apache.spark.sql.streaming.{OutputMode, Trigger}
  */
 case class WriteToStreamStatement(
     userSpecifiedName: Option[String],
+    userSpecifiedSinkName: Option[String],
     userSpecifiedCheckpointLocation: Option[String],
     useTempCheckpointLocation: Boolean,
     recoverFromCheckpointLocation: Boolean,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/transactions/TransactionUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/transactions/TransactionUtils.scala
new file mode 100644
index 0000000000000..b59733df0d343
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/transactions/TransactionUtils.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.transactions
+
+import java.util.UUID
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.connector.catalog.TransactionalCatalogPlugin
+import org.apache.spark.sql.connector.catalog.transactions.{Transaction, TransactionInfoImpl}
+import org.apache.spark.util.Utils
+
+object TransactionUtils {
+  def commit(txn: Transaction): Unit = {
+    Utils.tryWithSafeFinally {
+      txn.commit()
+    } {
+      txn.close()
+    }
+  }
+
+  def abort(txn: Transaction): Unit = {
+    Utils.tryWithSafeFinally {
+      txn.abort()
+    } {
+      txn.close()
+    }
+  }
+
+  def beginTransaction(catalog: TransactionalCatalogPlugin): Transaction = {
+    val info = TransactionInfoImpl(id = UUID.randomUUID.toString)
+    val txn = catalog.beginTransaction(info)
+    if (txn.catalog.name != catalog.name) {
+      abort(txn)
+      throw SparkException.internalError(
+        s"Transaction catalog name (${txn.catalog.name}) " +
+          s"must match original catalog name (${catalog.name}).")
+    }
+    txn
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index 6af98240160bc..a18adc7ad8330 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -29,7 +29,12 @@ object TreePattern extends Enumeration  {
   val ALIAS: Value = Value
   val ANALYSIS_AWARE_EXPRESSION: Value = Value
   val AND: Value = Value
+  val ARRAYS_OVERLAP: Value = Value
   val ARRAYS_ZIP: Value = Value
+  val ARRAY_DISTINCT: Value = Value
+  val ARRAY_EXCEPT: Value = Value
+  val ARRAY_INTERSECT: Value = Value
+  val ARRAY_UNION: Value = Value
   val ATTRIBUTE_REFERENCE: Value = Value
   val AVERAGE: Value = Value
   val BINARY_ARITHMETIC: Value = Value
@@ -154,6 +159,7 @@ object TreePattern extends Enumeration  {
   val LOGICAL_QUERY_STAGE: Value = Value
   val METRIC_VIEW_PLACEHOLDER: Value = Value
   val NATURAL_LIKE_JOIN: Value = Value
+  val NEAREST_BY_JOIN: Value = Value
   val NO_GROUPING_AGGREGATE_REFERENCE: Value = Value
   val OFFSET: Value = Value
   val OUTER_JOIN: Value = Value
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
index d2d9f8e446263..f7ab8b06baf5b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
@@ -302,6 +302,17 @@ object DataTypeUtils {
     }
   }
 
+  /**
+   * Returns true if the given data type contains any STRING/CHAR/VARCHAR with explicit collation
+   * (including explicit `UTF8_BINARY`), recursively checking nested types.
+   */
+  def hasNonDefaultStringCharOrVarcharType(dataType: DataType): Boolean = {
+    dataType.existsRecursively {
+      case st: StringType => !isDefaultStringCharOrVarcharType(st)
+      case _ => false
+    }
+  }
+
   /**
    * Recursively replaces all STRING, CHAR and VARCHAR types that do not have an explicit collation
    * with the same type but with explicit `UTF8_BINARY` collation.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
index 6f49b3998652c..af3b71182f6b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.types.ops.TypeOps
 import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, MapData, SQLOrderingUtil}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteExactNumeric, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, Decimal, DecimalExactNumeric, DecimalType, DoubleExactNumeric, DoubleType, FloatExactNumeric, FloatType, FractionalType, GeographyType, GeometryType, IntegerExactNumeric, IntegerType, IntegralType, LongExactNumeric, LongType, MapType, NullType, NumericType, ShortExactNumeric, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType}
-import org.apache.spark.unsafe.types.{ByteArray, GeographyVal, GeometryVal, UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{BinaryView, ByteArray, UTF8String, VariantVal}
 import org.apache.spark.util.ArrayImplicits._
 
 sealed abstract class PhysicalDataType {
@@ -63,8 +63,9 @@ object PhysicalDataType {
     case StructType(fields) => PhysicalStructType(fields)
     case MapType(keyType, valueType, valueContainsNull) =>
       PhysicalMapType(keyType, valueType, valueContainsNull)
-    case _: GeometryType => PhysicalGeometryType
-    case _: GeographyType => PhysicalGeographyType
+    // GEOMETRY and GEOGRAPHY are physically just an opaque chunk of bytes; they differ only
+    // at the logical-type level, so they share a single physical type.
+    case _: GeometryType | _: GeographyType => PhysicalBinaryViewType
     case VariantType => PhysicalVariantType
     case _ => UninitializedPhysicalType
   }
@@ -418,18 +419,19 @@ object UninitializedPhysicalType extends PhysicalDataType {
   @transient private[sql] lazy val tag = typeTag[InternalType]
 }
 
-case class PhysicalGeographyType() extends PhysicalDataType {
-  private[sql] type InternalType = GeographyVal
+// Physical type for opaque, variable-length byte payloads that are addressed as a zero-copy
+// BinaryView into the row backing buffer. Today GEOMETRY and GEOGRAPHY share this physical
+// type; future opaque-bytes logical types can plug into it as well.
+//
+// The physical type defines the natural ordering of its storage: BinaryView has a meaningful
+// unsigned lexicographic compareTo, so we expose it here. Whether a logical type backed by this
+// physical type can actually be ordered is a separate, logical-level decision made by
+// `OrderUtils.isOrderable`, which rejects GEOMETRY / GEOGRAPHY so `ORDER BY <geo_col>` fails at
+// analysis time rather than silently producing a byte-order result.
+class PhysicalBinaryViewType extends PhysicalDataType {
+  private[sql] val ordering = (x: BinaryView, y: BinaryView) => x.compareTo(y)
+  private[sql] type InternalType = BinaryView
   @transient private[sql] lazy val tag = typeTag[InternalType]
-  private[sql] val ordering = implicitly[Ordering[InternalType]]
-}
-
-object PhysicalGeographyType extends PhysicalGeographyType
-
-case class PhysicalGeometryType() extends PhysicalDataType {
-  private[sql] type InternalType = GeometryVal
-  @transient private[sql] lazy val tag = typeTag[InternalType]
-  private[sql] val ordering = implicitly[Ordering[InternalType]]
 }
 
-object PhysicalGeometryType extends PhysicalGeometryType
+case object PhysicalBinaryViewType extends PhysicalBinaryViewType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 82072443ec0ac..c5e6b5cbce93a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -485,6 +485,47 @@ object DateTimeUtils extends SparkDateTimeUtils {
     instantToMicros(truncated.toInstant)
   }
 
+  /**
+   * Fast path for truncating to MINUTE/HOUR/DAY using offset arithmetic instead of
+   * allocating a `ZonedDateTime` per row. The offset is resolved once for `micros`; the
+   * truncation then runs as `floorMod` in local time. We fall back to [[truncToUnit]] when
+   * the offset at the candidate truncated instant differs from the offset at `micros`,
+   * which means the truncation crosses a DST/historical transition and the local-time
+   * alignment we computed is no longer valid (see SPARK-30766/30857). The check is
+   * skipped for fixed-offset zones. Sub-minute offsets (e.g. America/Los_Angeles LMT
+   * -07:52:58, see SPARK-33404) and 30/45-minute offsets (Asia/Kolkata +05:30, Asia/Kathmandu
+   * +05:45) are handled correctly by this path because the offset is applied as part of
+   * the arithmetic; no offset-alignment guard is needed.
+   *
+   * `unitMicros` must evenly divide `MICROS_PER_DAY`; otherwise wall-clock unit
+   * boundaries do not align to multiples of `unitMicros` in the shifted-local frame
+   * and the `floorMod` truncation is unsafe.
+   */
+  private def truncToUnitFast(
+      micros: Long, zoneId: ZoneId, unitMicros: Long, fallbackUnit: ChronoUnit): Long = {
+    val rules = zoneId.getRules
+    val originalSec = Math.floorDiv(micros, MICROS_PER_SECOND)
+    val originalOffsetSec =
+      rules.getOffset(Instant.ofEpochSecond(originalSec)).getTotalSeconds.toLong
+    val offsetMicros = originalOffsetSec * MICROS_PER_SECOND
+    try {
+      val local = Math.addExact(micros, offsetMicros)
+      val truncatedLocal = Math.subtractExact(local, Math.floorMod(local, unitMicros))
+      val candidate = Math.subtractExact(truncatedLocal, offsetMicros)
+      if (!rules.isFixedOffset) {
+        val candidateSec = Math.floorDiv(candidate, MICROS_PER_SECOND)
+        val candidateOffsetSec =
+          rules.getOffset(Instant.ofEpochSecond(candidateSec)).getTotalSeconds.toLong
+        if (candidateOffsetSec != originalOffsetSec) {
+          return truncToUnit(micros, zoneId, fallbackUnit)
+        }
+      }
+      candidate
+    } catch {
+      case _: ArithmeticException => truncToUnit(micros, zoneId, fallbackUnit)
+    }
+  }
+
   /**
    * Returns the trunc date time from original date time and trunc level.
    * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 9.
@@ -496,12 +537,15 @@ object DateTimeUtils extends SparkDateTimeUtils {
     level match {
       case TRUNC_TO_MICROSECOND => micros
       case TRUNC_TO_MILLISECOND =>
-        micros - Math.floorMod(micros, MICROS_PER_MILLIS)
+        Math.subtractExact(micros, Math.floorMod(micros, MICROS_PER_MILLIS))
       case TRUNC_TO_SECOND =>
-        micros - Math.floorMod(micros, MICROS_PER_SECOND)
-      case TRUNC_TO_MINUTE => truncToUnit(micros, zoneId, ChronoUnit.MINUTES)
-      case TRUNC_TO_HOUR => truncToUnit(micros, zoneId, ChronoUnit.HOURS)
-      case TRUNC_TO_DAY => truncToUnit(micros, zoneId, ChronoUnit.DAYS)
+        Math.subtractExact(micros, Math.floorMod(micros, MICROS_PER_SECOND))
+      case TRUNC_TO_MINUTE =>
+        truncToUnitFast(micros, zoneId, MICROS_PER_MINUTE, ChronoUnit.MINUTES)
+      case TRUNC_TO_HOUR =>
+        truncToUnitFast(micros, zoneId, MICROS_PER_HOUR, ChronoUnit.HOURS)
+      case TRUNC_TO_DAY =>
+        truncToUnitFast(micros, zoneId, MICROS_PER_DAY, ChronoUnit.DAYS)
       case _ => // Try to truncate date levels
         val dDays = microsToDays(micros, zoneId)
         daysToMicros(truncDate(dDays, level), zoneId)
@@ -1059,4 +1103,85 @@ object DateTimeUtils extends SparkDateTimeUtils {
         time, timePrecision, interval, intervalEndField)
     }
   }
+
+  /**
+   * DayTimeInterval bucketing: bucket k starts at
+   * `timestampAddDayTime(originMicros, k * bucketMicros, zoneId)`, matching the instant that
+   * `originMicros + INTERVAL '<k * bucketSize>'` would produce. For sub-day buckets the
+   * calendar-day component is zero, so the result is pure UTC-microsecond floor division
+   * and `zoneId` has no effect.
+   *
+   * `bucketMicros` must be positive; `TimeBucket.checkInputDataTypes` enforces this at
+   * analysis time.
+   *
+   * @param bucketMicros bucket size in microseconds.
+   * @param tsMicros     timestamp to bucket, in microseconds since the epoch (UTC).
+   * @param originMicros grid alignment anchor, in microseconds since the epoch (UTC).
+   * @param zoneId       zone in which calendar-day arithmetic is performed.
+   */
+  def timeBucketDTInterval(
+      bucketMicros: Long, tsMicros: Long, originMicros: Long, zoneId: ZoneId): Long = {
+    val bucketDays = bucketMicros / MICROS_PER_DAY
+
+    val diff = MathUtils.subtractExact(tsMicros, originMicros)
+    var k = MathUtils.floorDiv(diff, bucketMicros)
+
+    if (bucketDays == 0) {
+      val bucketOffset = MathUtils.multiplyExact(k, bucketMicros)
+      MathUtils.addExact(originMicros, bucketOffset)
+    } else {
+      // bucketMicros >= MICROS_PER_DAY, so DST offset shifts (a few hours at most) can
+      // move candidate(k) within one bucket of `origin + k*bucketMicros` but no further.
+      // One +/-1 step recovers the correct k.
+      def candidate(kk: Long): Long =
+        timestampAddDayTime(originMicros, MathUtils.multiplyExact(kk, bucketMicros), zoneId)
+
+      var c = candidate(k)
+      if (c > tsMicros) {
+        k -= 1
+        c = candidate(k)
+      } else {
+        val cNext = candidate(MathUtils.addExact(k, 1L))
+        if (cNext <= tsMicros) c = cNext
+      }
+      c
+    }
+  }
+
+  /**
+   * YearMonthInterval bucketing: bucket k starts at
+   * `originZdt.plusMonths(k * bucketMonths)`, matching the instant that
+   * `originMicros + INTERVAL '<k * bucketMonths>' MONTH` would produce. The offset of
+   * bucket boundaries depends on which side of a DST fold the origin instant resolves to,
+   * mirroring `+ INTERVAL '<n>' MONTH` semantics.
+   *
+   * `bucketMonths` must be positive; `TimeBucket.checkInputDataTypes` enforces this at
+   * analysis time.
+   *
+   * @param bucketMonths bucket size in months.
+   * @param tsMicros     timestamp to bucket, in microseconds since the epoch (UTC).
+   * @param originMicros grid alignment anchor, in microseconds since the epoch (UTC).
+   * @param zoneId       zone in which calendar arithmetic is performed.
+   */
+  def timeBucketYMInterval(
+      bucketMonths: Int, tsMicros: Long, originMicros: Long, zoneId: ZoneId): Long = {
+    val originZdt = microsToInstant(originMicros).atZone(zoneId)
+    val tsZdt = microsToInstant(tsMicros).atZone(zoneId)
+    val rawMonthDiff = (tsZdt.getYear.toLong * 12 + tsZdt.getMonthValue) -
+      (originZdt.getYear.toLong * 12 + originZdt.getMonthValue)
+
+    def candidate(kk: Long): Long = instantToMicros(
+      originZdt.plusMonths(MathUtils.multiplyExact(kk, bucketMonths.toLong)).toInstant)
+
+    var k = MathUtils.floorDiv(rawMonthDiff, bucketMonths.toLong)
+    var c = candidate(k)
+    // candidate(k) may overshoot ts within the same calendar month -- either via
+    // end-of-month capping (Jan 31 -> Feb 28) or because origin's day/time exceeds ts's.
+    // plusMonths is monotonic, so a single step-back suffices.
+    if (c > tsMicros) {
+      k -= 1
+      c = candidate(k)
+    }
+    c
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
index 8d88b05546ed2..793c994fdd43e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.BuiltInFunctionCatalog
-import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, TableCatalog, TableCatalogCapability}
+import org.apache.spark.sql.connector.catalog.{DefaultCatalogManager, Identifier, TableCatalog, TableCatalogCapability}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
@@ -205,5 +205,5 @@ object GeneratedColumn {
  * Analyzer for processing generated column expressions using built-in functions only.
  */
 object GeneratedColumnAnalyzer extends Analyzer(
-  new CatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
+  new DefaultCatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
index 808a3d43bf200..5fe4284fc19b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
@@ -72,8 +72,7 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData {
   override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = getAs(ordinal)
   override def getUTF8String(ordinal: Int): UTF8String = getAs(ordinal)
   override def getBinary(ordinal: Int): Array[Byte] = getAs(ordinal)
-  override def getGeography(ordinal: Int): GeographyVal = getAs(ordinal)
-  override def getGeometry(ordinal: Int): GeometryVal = getAs(ordinal)
+  override def getBinaryView(ordinal: Int): BinaryView = getAs(ordinal)
   override def getInterval(ordinal: Int): CalendarInterval = getAs(ordinal)
   override def getVariant(ordinal: Int): VariantVal = getAs(ordinal)
   override def getStruct(ordinal: Int, numFields: Int): InternalRow = getAs(ordinal)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index 9c077630f33d2..68529e41937e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, Optimizer}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
-import org.apache.spark.sql.connector.catalog.{CatalogManager, Column, DefaultValue, FunctionCatalog, Identifier, TableCatalog, TableCatalogCapability}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Column, DefaultCatalogManager, DefaultValue, FunctionCatalog, Identifier, TableCatalog, TableCatalogCapability}
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
@@ -439,7 +439,7 @@ object ResolveDefaultColumns extends QueryErrorsBase
           throw QueryCompilationErrors.defaultValuesDataTypeError(
             statementType, colName, defaultSQL, dataType, other.dataType))
     }
-    if (!conf.charVarcharAsString && CharVarcharUtils.hasCharVarchar(dataType)) {
+    if (!conf.charVarcharAsString && CharVarcharUtils.hasCharVarchar(dataType) && ret.foldable) {
       CharVarcharUtils.stringLengthCheck(ret, dataType).eval(EmptyRow)
     }
     ret
@@ -597,7 +597,7 @@ object ResolveDefaultColumns extends QueryErrorsBase
    * This is an Analyzer for processing default column values using built-in functions only.
    */
   object DefaultColumnAnalyzer extends Analyzer(
-    new CatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
+    new DefaultCatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index b89bf7d6c6f43..87c55aaf1eaa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -279,139 +279,68 @@ object StringUtils extends Logging {
     pattern.matches(cleanText)
   }
 
-  private def containsNonWhiteSpaceCharacters(inputString: String): Boolean = {
-    val pattern = "\\S".r
-    pattern.findFirstIn(inputString).isDefined
-  }
-
-  // Implementation is grabbed from SparkSQLCLIDriver.splitSemiColon, the only difference is this
-  // implementation handles backtick and treat it as single/double quote.
-  // Below comments are from the source:
-  // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
-  // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
-  // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
-  // hence we refined this function a little bit.
-  // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
+  // Structural scanner for splitting SQL by semicolons.
+  // Handles quoted strings with escapes, line comments (--), and nested block comments (/* */).
+  // Semicolons inside strings or comments are not treated as delimiters.
+  // Note: [SPARK-31595], [SPARK-33100], [SPARK-54876]
   def splitSemiColonWithIndex(line: String, enableSqlScripting: Boolean): List[String] = {
-    var insideSingleQuote = false
-    var insideDoubleQuote = false
-    var insideBacktick = false
-    var insideSimpleComment = false
-    var bracketedCommentLevel = 0
-    var escape = false
-    var beginIndex = 0
-    var leavingBracketedComment = false
-    var hasPrecedingNonCommentString = false
-    var isStatement = false
-    val ret = new ArrayBuffer[String]()
-
     lazy val insideSqlScript: Boolean = isSqlScript(line)
+    if (enableSqlScripting && insideSqlScript) return List(line)
 
-    def insideBracketedComment: Boolean = bracketedCommentLevel > 0
-    def insideComment: Boolean = insideSimpleComment || insideBracketedComment
-    def statementInProgress(index: Int): Boolean =
-      isStatement || (!insideComment &&
-        index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
-
-    for (index <- 0 until line.length) {
-      // Checks if we need to decrement a bracketed comment level; the last character '/' of
-      // bracketed comments is still inside the comment, so `insideBracketedComment` must keep
-      // true in the previous loop and we decrement the level here if needed.
-      if (leavingBracketedComment) {
-        bracketedCommentLevel -= 1
-        leavingBracketedComment = false
+    val ret = new ArrayBuffer[String]()
+    val n = line.length
+    var i = 0
+    var chunkStart = 0
+    var chunkHasSql = false
+    var chunkHasUnclosed = false
+
+    def consumeString(start: Int, quote: Char): Int = {
+      var p = start + 1
+      while (p < n) {
+        val c = line.charAt(p)
+        if (c == '\\' && p + 1 < n) p += 2
+        else if (c == quote) return p + 1
+        else p += 1
       }
+      chunkHasUnclosed = true; n
+    }
 
-      if (line.charAt(index) == '\'' && !insideComment) {
-        // take a look to see if it is escaped
-        // See the comment above about SPARK-31595
-        if (!escape && !insideDoubleQuote && !insideBacktick) {
-          // flip the boolean variable
-          insideSingleQuote = !insideSingleQuote
-        }
-      } else if (line.charAt(index) == '\"' && !insideComment) {
-        // take a look to see if it is escaped
-        // See the comment above about SPARK-31595
-        if (!escape && !insideSingleQuote && !insideBacktick) {
-          // flip the boolean variable
-          insideDoubleQuote = !insideDoubleQuote
-        }
-      } else if (line.charAt(index) == '`' && !insideComment) {
-        // take a look to see if it is escaped
-        // See the comment above about SPARK-31595
-        if (!escape && !insideSingleQuote && !insideDoubleQuote) {
-          // flip the boolean variable
-          insideBacktick = !insideBacktick
-        }
-      } else if (line.charAt(index) == '-') {
-        val hasNext = index + 1 < line.length
-        if (insideDoubleQuote || insideSingleQuote || insideBacktick || insideComment) {
-          // Ignores '-' in any case of quotes or comment.
-          // Avoids to start a comment(--) within a quoted segment or already in a comment.
-          // Sample query: select "quoted value --"
-          //                                    ^^ avoids starting a comment if inside quotes.
-        } else if (hasNext && line.charAt(index + 1) == '-') {
-          // ignore quotes and ; in simple comment
-          insideSimpleComment = true
-        }
-      } else if (line.charAt(index) == ';') {
-        if (insideSingleQuote || insideDoubleQuote || insideBacktick || insideComment) {
-          // do not split
-        } else if (enableSqlScripting && insideSqlScript) {
-          // do not split
-        } else {
-          if (isStatement) {
-            // split, do not include ; itself
-            ret.append(line.substring(beginIndex, index))
-          }
-          beginIndex = index + 1
-          isStatement = false
-        }
-      } else if (line.charAt(index) == '\n') {
-        // with a new line the inline simple comment should end.
-        if (!escape) {
-          insideSimpleComment = false
-        }
-      } else if (line.charAt(index) == '/' && !insideSimpleComment) {
-        val hasNext = index + 1 < line.length
-        if (insideSingleQuote || insideDoubleQuote || insideBacktick) {
-          // Ignores '/' in any case of quotes
-        } else if (insideBracketedComment && line.charAt(index - 1) == '*') {
-          // Decrements `bracketedCommentLevel` at the beginning of the next loop
-          leavingBracketedComment = true
-        } else if (hasNext && line.charAt(index + 1) == '*') {
-          bracketedCommentLevel += 1
-          // Check if there's non-comment characters(non space, non newline characters) before
-          // multiline comments.
-          hasPrecedingNonCommentString = beginIndex != index && containsNonWhiteSpaceCharacters(
-            line.substring(beginIndex, index)
-          )
-        }
-      }
-      // set the escape
-      if (escape) {
-        escape = false
-      } else if (line.charAt(index) == '\\') {
-        escape = true
-      }
+    def consumeLineComment(start: Int): Int = {
+      var p = start + 2
+      while (p < n && line.charAt(p) != '\n') p += 1
+      p
+    }
 
-      isStatement = statementInProgress(index)
+    def consumeBlockComment(start: Int): Int = {
+      var p = start + 2
+      var level = 1
+      while (p + 1 < n && level > 0) {
+        val c0 = line.charAt(p); val c1 = line.charAt(p + 1)
+        if (c0 == '/' && c1 == '*') { level += 1; p += 2 }
+        else if (c0 == '*' && c1 == '/') { level -= 1; p += 2 }
+        else p += 1
+      }
+      if (level > 0) { chunkHasUnclosed = true; n } else p
     }
-    // Check the last char is end of nested bracketed comment.
-    val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 &&
-      !hasPrecedingNonCommentString
-    // Spark SQL support simple comment and nested bracketed comment in query body.
-    // But if Spark SQL receives a comment alone, it will throw parser exception.
-    // In Spark SQL CLI, if there is a completed comment in the end of whole query,
-    // since Spark SQL CLL use `;` to split the query, CLI will pass the comment
-    // to the backend engine and throw exception. CLI should ignore this comment,
-    // If there is an uncompleted statement or an uncompleted bracketed comment in the end,
-    // CLI should also pass this part to the backend engine, which may throw an exception
-    // with clear error message (for incomplete statement, if there's non comment characters,
-    // we would still append the string).
-    if (!endOfBracketedComment && (isStatement || insideBracketedComment)) {
-      ret.append(line.substring(beginIndex))
+
+    while (i < n) {
+      val c = line.charAt(i)
+      def peek(ch: Char): Boolean = i + 1 < n && line.charAt(i + 1) == ch
+      if (c == '\'' || c == '"' || c == '`') {
+        chunkHasSql = true; i = consumeString(i, c)
+      } else if (c == '-' && peek('-')) {
+        i = consumeLineComment(i)
+      } else if (c == '/' && peek('*')) {
+        i = consumeBlockComment(i)
+      } else if (c == ';') {
+        if (chunkHasSql) ret += line.substring(chunkStart, i)
+        chunkStart = i + 1; chunkHasSql = false; chunkHasUnclosed = false; i += 1
+      } else {
+        if (!Character.isWhitespace(c)) chunkHasSql = true
+        i += 1
+      }
     }
+    if (chunkHasSql || chunkHasUnclosed) ret += line.substring(chunkStart)
     ret.toList
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index 67423e1b50d3f..2e7e88633bfa0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -187,10 +187,10 @@ package object util extends Logging {
   val QUALIFIED_ACCESS_ONLY = "__qualified_access_only"
 
   /**
-   * If set, this metadata column can only be accessed under [[AggregateExpression]]. This is
-   * important when resolving columns in ORDER BY and HAVING clauses on top of [[Aggregate]].
-   * In this case we can only reference attributes from grouping expressions, or attributes marked
-   * as "__aggregated_access_only" under [[AggregateExpression]].
+   * If set, this column can only be accessed under [[AggregateExpression]]. This is important when
+   * resolving columns in ORDER BY and HAVING clauses on top of [[Aggregate]]. In this case we can
+   * only reference attributes from grouping expressions, or attributes marked as
+   * "__aggregated_access_only" under [[AggregateExpression]].
    */
   val AGGREGATED_ACCESS_ONLY = "__aggregated_access_only"
 
@@ -202,8 +202,7 @@ package object util extends Logging {
       attr.metadata.contains(QUALIFIED_ACCESS_ONLY) &&
       attr.metadata.getBoolean(QUALIFIED_ACCESS_ONLY)
 
-    def aggregatedAccessOnly: Boolean = attr.isMetadataCol &&
-      attr.metadata.contains(AGGREGATED_ACCESS_ONLY) &&
+    def aggregatedAccessOnly: Boolean = attr.metadata.contains(AGGREGATED_ACCESS_ONLY) &&
       attr.metadata.getBoolean(AGGREGATED_ACCESS_ONLY)
 
     def markAsQualifiedAccessOnly(): Attribute = attr.withMetadata(
@@ -217,7 +216,6 @@ package object util extends Logging {
     def markAsAggregatedAccessOnly(): Attribute = attr.withMetadata(
       new MetadataBuilder()
         .withMetadata(attr.metadata)
-        .putString(METADATA_COL_ATTR_KEY, attr.name)
         .putBoolean(AGGREGATED_ACCESS_ONLY, true)
         .build()
     )
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index 7df836cea6124..4dd8af5eb37ef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -17,39 +17,117 @@
 
 package org.apache.spark.sql.connector.catalog
 
+import java.util.concurrent.atomic.AtomicReference
+
 import scala.collection.mutable
+import scala.util.Try
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, TempVariableManager}
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog.SessionFunctionKind
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager.SessionPathEntry
+import org.apache.spark.sql.connector.catalog.transactions.Transaction
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 
 /**
- * A thread-safe manager for [[CatalogPlugin]]s. It tracks all the registered catalogs, and allow
- * the caller to look up a catalog by name.
+ * A thread-safe contract for managing [[CatalogPlugin]]s. Implementations resolve catalogs by
+ * name and maintain the current catalog and namespace for a session.
  *
  * There are still many commands (e.g. ANALYZE TABLE) that do not support v2 catalog API. They
  * ignore the current catalog and blindly go to the v1 `SessionCatalog`. To avoid tracking current
- * namespace in both `SessionCatalog` and `CatalogManger`, we let `CatalogManager` to set/get
+ * namespace in both `SessionCatalog` and `CatalogManager`, implementations set/get the
  * current database of `SessionCatalog` when the current catalog is the session catalog.
+ *
+ * Two implementations exist: [[DefaultCatalogManager]] owns the mutable session state;
+ * [[TransactionAwareCatalogManager]] wraps another manager and redirects catalog lookups to the
+ * active transaction's catalog.
  */
 // TODO: all commands should look up table from the current catalog. The `SessionCatalog` doesn't
 //       need to track current database at all.
-private[sql]
-class CatalogManager(
-    defaultSessionCatalog: CatalogPlugin,
-    val v1SessionCatalog: SessionCatalog) extends SQLConfHelper with Logging {
+private[sql] trait CatalogManager extends SQLConfHelper with Logging {
+
+  // ---- Underlying state exposed by implementations ----
+  def defaultSessionCatalog: CatalogPlugin
+  def v1SessionCatalog: SessionCatalog
+  def tempVariableManager: TempVariableManager
+
+  // ---- Catalog access ----
+  def catalog(name: String): CatalogPlugin
+  private[sql] def v2SessionCatalog: CatalogPlugin
+  def listCatalogs(pattern: Option[String]): Seq[String]
+  def currentCatalog: CatalogPlugin
+  def setCurrentCatalog(catalogName: String): Unit
+  def isCatalogRegistered(name: String): Boolean = {
+    try {
+      catalog(name)
+      true
+    } catch {
+      case _: CatalogNotFoundException => false
+    }
+  }
+
+  // ---- Transactions ----
+  def transaction: Option[Transaction] = None
+
+  def withTransaction(transaction: Transaction): CatalogManager
+
+  // ---- Namespace ----
+  def currentNamespace: Array[String]
+  def setCurrentNamespace(namespace: Array[String]): Unit
+
+  // ---- Session path ----
+  def sessionPathEntries: Option[Seq[SessionPathEntry]]
+  def storedSessionPathEntries: Option[Seq[SessionPathEntry]]
+  def confDefaultPathEntries: Option[Seq[SessionPathEntry]]
+  def setSessionPath(entries: Seq[SessionPathEntry]): Unit
+  def clearSessionPath(): Unit
+  private[sql] def copySessionPathFrom(other: CatalogManager): Unit
+  def currentPathString: String
+  def sqlResolutionPathEntries(
+      pathDefaultCatalog: String,
+      pathDefaultNamespace: Seq[String],
+      expandCatalog: String,
+      expandNamespace: Seq[String]): Seq[Seq[String]]
+  def sqlResolutionPathEntries(
+      currentCatalog: String,
+      currentNamespace: Seq[String]): Seq[Seq[String]]
+  def isSystemSessionOnPath: Boolean
+  def resolutionPathEntriesForAnalysis(
+      pinnedEntries: Option[Seq[Seq[String]]],
+      viewCatalogAndNamespace: Seq[String]): Seq[Seq[String]]
+  def sessionFunctionKindsForUnqualifiedResolution(): Seq[SessionFunctionKind]
+
+  // Reset the manager to its initial state. Only used in tests.
+  private[sql] def reset(): Unit
+}
+
+/**
+ * Default [[CatalogManager]] implementation. Owns the mutable session state
+ * (registered catalogs, current catalog/namespace, session path).
+ */
+private[sql] class DefaultCatalogManager(
+    override val defaultSessionCatalog: CatalogPlugin,
+    override val v1SessionCatalog: SessionCatalog) extends CatalogManager {
   import CatalogManager.SESSION_CATALOG_NAME
   import CatalogV2Util._
 
   private val catalogs = mutable.HashMap.empty[String, CatalogPlugin]
 
   // TODO: create a real SYSTEM catalog to host `TempVariableManager` under the SESSION namespace.
-  val tempVariableManager: TempVariableManager = new TempVariableManager
+  override val tempVariableManager: TempVariableManager = new TempVariableManager
 
-  def catalog(name: String): CatalogPlugin = synchronized {
+  // Wire `SessionCatalog`'s fast-path kinds to the live SQL PATH. The kinds list itself is
+  // pure data conversion (system entries from the path, in path order); the *decision* to use
+  // path-order kinds for unqualified lookups lives at the Strategy layer (see callers of
+  // [[CatalogManager.systemFunctionKindsFromPath]]).
+  v1SessionCatalog.bindCatalogManagerForSessionFunctionKinds(this)
+
+  override def catalog(name: String): CatalogPlugin = synchronized {
     if (name.equalsIgnoreCase(SESSION_CATALOG_NAME)) {
       v2SessionCatalog
     } else {
@@ -57,15 +135,6 @@ class CatalogManager(
     }
   }
 
-  def isCatalogRegistered(name: String): Boolean = {
-    try {
-      catalog(name)
-      true
-    } catch {
-      case _: CatalogNotFoundException => false
-    }
-  }
-
   private def loadV2SessionCatalog(): CatalogPlugin = {
     Catalogs.load(SESSION_CATALOG_NAME, conf) match {
       case extension: CatalogExtension =>
@@ -84,16 +153,19 @@ class CatalogManager(
    * This happens when the source implementation extends the v2 TableProvider API and is not listed
    * in the fallback configuration, spark.sql.sources.useV1SourceList
    */
-  private[sql] def v2SessionCatalog: CatalogPlugin = {
+  override private[sql] def v2SessionCatalog: CatalogPlugin = {
     conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) match {
       case "builtin" => defaultSessionCatalog
       case _ => catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog())
     }
   }
 
+  override def withTransaction(transaction: Transaction): CatalogManager =
+    new TransactionAwareCatalogManager(this, transaction)
+
   private var _currentNamespace: Option[Array[String]] = None
 
-  def currentNamespace: Array[String] = {
+  override def currentNamespace: Array[String] = {
     val defaultNamespace = if (currentCatalog.name() == SESSION_CATALOG_NAME) {
       Array(v1SessionCatalog.getCurrentDatabase)
     } else {
@@ -115,45 +187,139 @@ class CatalogManager(
     }
   }
 
-  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
-    if (isSessionCatalog(currentCatalog) && namespace.length == 1) {
+  override def setCurrentNamespace(namespace: Array[String]): Unit = {
+    // SPARK-56939: do NOT hold [[CatalogManager]]'s intrinsic lock across the callbacks below.
+    // [[v1SessionCatalog.setCurrentDatabaseWithNameCheck]] briefly synchronizes on
+    // [[SessionCatalog]], and concurrent unqualified function resolution acquires the
+    // [[SessionCatalog]] lock and then reaches into [[CatalogManager]] via
+    // [[sqlResolutionPathEntries]]; nesting the manager lock outside the catalog lock here
+    // would invert that order and deadlock. Snapshot the dispatch decision under the lock,
+    // run callbacks outside it, then publish the new namespace under the lock again.
+    //
+    // Concurrency trade-offs versus the pre-SPARK-56939 atomic version (v1-side and
+    // CM-side drift modes):
+    //
+    // (a) v1-side drift. The `isSession` snapshot can drift if a concurrent
+    //     [[setCurrentCatalog]] switches to a v2 catalog between this read and the v1
+    //     callback below -- the callback would still touch `v1.currentDb` even though
+    //     the active catalog is no longer the session catalog. A later switch back to
+    //     the session catalog resets `v1.currentDb` to `default` (see
+    //     [[setCurrentCatalog]]), so long-term state remains consistent; only the
+    //     intermediate observation is novel.
+    //
+    // (b) CM-side publish-overwrite drift (sticky). Between the v1 callback returning
+    //     and the publish below, a concurrent [[setCurrentCatalog]] can complete fully
+    //     -- switching `_currentCatalogName` to (say) a v2 catalog and clearing
+    //     `_currentNamespace = None` -- before this method's publish overwrites that
+    //     with `Some(namespace)`. End state: `_currentNamespace = Some(namespace)` is
+    //     published under a different `_currentCatalogName` than the one observed when
+    //     [[isSession]] was snapshotted at the top. Unlike (a) there is no analogous
+    //     auto-recovery; the mismatch sticks until the next `USE`. This is still
+    //     last-writer-wins for two racing `USE` commands, which is the conventional
+    //     expectation, so it is accepted as a trade-off against the deadlock alternative.
+    val isSession = synchronized(isSessionCatalog(currentCatalog))
+    if (isSession && namespace.length == 1) {
       v1SessionCatalog.setCurrentDatabaseWithNameCheck(
         namespace.head,
         _ => assertNamespaceExist(namespace))
     } else {
       assertNamespaceExist(namespace)
     }
-    _currentNamespace = Some(namespace)
+    synchronized {
+      _currentNamespace = Some(namespace)
+    }
   }
 
-  import CatalogManager.SessionPathEntry
-
   private var _sessionPath: Option[Seq[SessionPathEntry]] = None
 
-  /** Returns the raw stored session path entries, or None if no path is set. */
-  def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized { _sessionPath }
+  /**
+   * Cache for [[confDefaultPathEntries]]: stores the expanded [[SessionPathEntry]] list keyed
+   * on the trimmed [[SQLConf#DEFAULT_PATH]] string and
+   * [[SQLConf#SESSION_FUNCTION_RESOLUTION_ORDER]] value (the only conf that affects the
+   * expansion of `DEFAULT_PATH` / `SYSTEM_PATH` tokens).
+   * `CurrentSchemaEntry` markers are preserved unresolved so the cache stays valid across
+   * `USE SCHEMA`.
+   */
+  private val confDefaultPathCache =
+    new AtomicReference[Option[(String, String, Seq[SessionPathEntry])]](None)
+
+  /**
+   * Returns the effective session path entries: the explicit `SET PATH` value if stored,
+   * else the parsed [[SQLConf#DEFAULT_PATH]] conf if non-empty (mirroring how
+   * [[currentCatalog]] falls back to [[SQLConf#DEFAULT_CATALOG]]). Returns `None` when
+   * [[SQLConf#PATH_ENABLED]] is false or both sources are empty.
+   */
+  override def sessionPathEntries: Option[Seq[SessionPathEntry]] = synchronized {
+    if (!conf.pathEnabled) None
+    else _sessionPath.orElse(confDefaultPathEntries)
+  }
+
+  /** Raw `_sessionPath` (post-`SET PATH`), without the [[SQLConf#DEFAULT_PATH]] fallback. */
+  override def storedSessionPathEntries: Option[Seq[SessionPathEntry]] =
+    synchronized { _sessionPath }
+
+  /**
+   * Parsed and expanded [[SQLConf#DEFAULT_PATH]] value, or `None` when the conf is empty.
+   * Reuses the SET PATH grammar via
+   * [[org.apache.spark.sql.catalyst.parser.AbstractSqlParser#parsePathElements]] (via
+   * [[org.apache.spark.sql.catalyst.parser.CatalystSqlParser]]). An inner
+   * `DEFAULT_PATH` token resolves to the spark-builtin default ordering (cycle break).
+   *
+   * Unlike `SET PATH`, this does NOT run a duplicate check: lookup uses first-match
+   * resolution, so any redundant entry (including ones that only collide after a later
+   * `USE SCHEMA`) is dead code rather than an error. Cached so the hot path is a single
+   * atomic load on conf-stable sessions.
+   */
+  override def confDefaultPathEntries: Option[Seq[SessionPathEntry]] = {
+    val confValue = conf.defaultPath
+    if (confValue == null || confValue.trim.isEmpty) {
+      confDefaultPathCache.set(None)
+      None
+    } else {
+      val trimmed = confValue.trim
+      val sessionOrder = conf.sessionFunctionResolutionOrder
+      val expanded = confDefaultPathCache.get() match {
+        case Some((k, ord, cached)) if k == trimmed && ord == sessionOrder => cached
+        case _ =>
+          val elements = CatalystSqlParser.parsePathElements(trimmed)
+          val computed = PathElement.expand(elements, conf, this, isConfDefaultExpansion = true)
+          confDefaultPathCache.set(Some((trimmed, sessionOrder, computed)))
+          computed
+      }
+      if (expanded.isEmpty) None else Some(expanded)
+    }
+  }
 
-  def setSessionPath(entries: Seq[SessionPathEntry]): Unit = synchronized {
+  override def setSessionPath(entries: Seq[SessionPathEntry]): Unit = synchronized {
     _sessionPath = Some(entries)
   }
 
-  def clearSessionPath(): Unit = synchronized {
+  override def clearSessionPath(): Unit = synchronized {
     _sessionPath = None
   }
 
-  private[sql] def copySessionPathFrom(other: CatalogManager): Unit = synchronized {
-    _sessionPath = other.sessionPathEntries
+  override private[sql] def copySessionPathFrom(other: CatalogManager): Unit = synchronized {
+    _sessionPath = other.storedSessionPathEntries
   }
 
   /**
    * String form of the current resolution path for CURRENT_PATH().
-   * When PATH is enabled and a session path is stored, formats the effective path entries
-   * with markers expanded. Otherwise falls back to the legacy resolutionSearchPath.
+   * When PATH is enabled and a session path is in effect (stored or via
+   * [[SQLConf#DEFAULT_PATH]]), formats the resolved entries. Otherwise falls back to the legacy
+   * resolutionSearchPath.
+   *
+   * SPARK-56939 note: this is currently the only intentional `CatalogManager.synchronized ->
+   * SessionCatalog.synchronized` nest left in this class. The transitive call into
+   * [[v1SessionCatalog.getCurrentDatabase]] happens via [[currentNamespace]], which fetches
+   * the v1 current database under the CM lock. It is safe today because no code path holds
+   * [[SessionCatalog]]'s intrinsic lock while waiting on [[CatalogManager]]'s -- the
+   * SPARK-56939 fix removed every such SC->CM ordering. Any future change that introduces a
+   * new SC->CM ordering must take `currentPathString` (or any other CM->SC nest) into
+   * account to avoid resurrecting the deadlock.
    */
-  def currentPathString: String = synchronized {
+  override def currentPathString: String = synchronized {
     import CatalogV2Implicits._
-    val stored = if (conf.pathEnabled) _sessionPath else None
-    stored match {
+    sessionPathEntries match {
       case Some(entries) =>
         val resolved = CatalogManager.resolvePathEntries(
           entries, currentCatalog.name(), currentNamespace.toSeq)
@@ -164,40 +330,206 @@ class CatalogManager(
     }
   }
 
+  /**
+   * Ordered catalog/schema path entries for resolving unqualified SQL object names.
+   * When PATH is off or unset, applies [[SQLConf#defaultPathOrder]] (legacy).
+   * When PATH is in effect (stored or via the [[SQLConf#DEFAULT_PATH]] conf), uses the
+   * resolved entries.
+   */
+  override def sqlResolutionPathEntries(
+      pathDefaultCatalog: String,
+      pathDefaultNamespace: Seq[String],
+      expandCatalog: String,
+      expandNamespace: Seq[String]): Seq[Seq[String]] = synchronized {
+    val defaultEntry =
+      if (pathDefaultNamespace.isEmpty) Seq(pathDefaultCatalog)
+      else pathDefaultCatalog +: pathDefaultNamespace
+    sessionPathEntries match {
+      case Some(entries) =>
+        CatalogManager.resolvePathEntries(entries, expandCatalog, expandNamespace)
+      case None =>
+        conf.defaultPathOrder(Seq(defaultEntry))
+    }
+  }
+
+  /** Session-catalog overload. */
+  override def sqlResolutionPathEntries(
+      currentCatalog: String,
+      currentNamespace: Seq[String]): Seq[Seq[String]] =
+    sqlResolutionPathEntries(
+      currentCatalog, currentNamespace,
+      currentCatalog, currentNamespace)
+
+  /**
+   * Snapshot the live PATH-derived [[SessionCatalog.SessionFunctionKind]] order used by
+   * unqualified function/table-function resolution.
+   *
+   * The `(currentCatalog, _currentNamespace, sessionPath)` triple is read together inside a
+   * single CM critical section so a concurrent `USE` / `SET PATH` cannot return a torn
+   * snapshot for those three fields (e.g. catalog from one observation, explicit namespace
+   * from another).
+   *
+   * The `v1SessionCatalog.getCurrentDatabase` read needed for the default-namespace fallback
+   * is taken OUTSIDE the CM lock and is therefore intentionally racy w.r.t. a concurrent
+   * `USE SCHEMA`. That staleness is harmless for this helper's output: this method consumes
+   * `effectiveNs` only to expand `CURRENT_SCHEMA` markers in the SQL path, and
+   * [[CatalogManager.systemFunctionKindsFromPath]] only retains literal `system.builtin` /
+   * `system.session` entries from the resolved path -- it never inspects any
+   * `(catalog, namespace)` derived from `v1`. So if `v1CurrentDb` lags by one `USE SCHEMA`,
+   * a `CURRENT_SCHEMA` entry might briefly resolve to the previous database, but the kinds
+   * list (the only thing returned here) is unaffected. Moving the read inside the CM lock
+   * would re-introduce the SPARK-56939 lock-order inversion this helper exists to avoid.
+   *
+   * Callers (e.g. [[SessionCatalog.sessionFunctionKindsInResolutionOrder]],
+   * [[org.apache.spark.sql.catalyst.analysis.FunctionResolution.isSessionBeforeBuiltinInPath]])
+   * MUST NOT hold [[SessionCatalog]]'s intrinsic lock when invoking this method.
+   */
+  override def sessionFunctionKindsForUnqualifiedResolution(): Seq[SessionFunctionKind] = {
+    // SPARK-56939: read v1's current database before taking the CM lock; see the method
+    // doc for why the resulting staleness is harmless for the kinds list.
+    val v1CurrentDb = v1SessionCatalog.getCurrentDatabase
+    val pathEntries = synchronized {
+      val catName = currentCatalog.name()
+      val effectiveNs: Seq[String] = _currentNamespace.map(_.toSeq).getOrElse {
+        if (catName == SESSION_CATALOG_NAME) {
+          Seq(v1CurrentDb)
+        } else {
+          currentCatalog.defaultNamespace().toSeq
+        }
+      }
+      sqlResolutionPathEntries(catName, effectiveNs)
+    }
+    CatalogManager.systemFunctionKindsFromPath(pathEntries)
+  }
+
+  /**
+   * True if `system.session` is on the SQL path. Only literal path entries can match: the
+   * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]] marker expands to
+   * `currentCatalog.name() +: currentNamespace`, and
+   * `system` is not a registered catalog (it is a synthetic namespace served via
+   * [[org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog]] / `lookupBuiltinOrTempFunction`,
+   * not loadable via [[catalog]]), so `currentCatalog.name()` cannot be `"system"`. If that
+   * invariant ever changes, this short-circuit must be revisited.
+   * Inspecting effective entries directly avoids loading the configured default catalog.
+   */
+  override def isSystemSessionOnPath: Boolean = synchronized {
+    if (!conf.pathEnabled) return true
+    sessionPathEntries match {
+      case None => true
+      case Some(entries) => entries.exists {
+        case CatalogManager.LiteralPathEntry(parts) =>
+          CatalogManager.isSystemSessionPathEntry(parts)
+        case _ => false
+      }
+    }
+  }
+
+  /**
+   * Single source of truth for analysis-time resolution path entries used by relation, routine,
+   * and procedure resolution. When `pinnedEntries` are set (a view or SQL function body's
+   * persisted frozen path) and PATH is enabled, returns them as-is so unqualified lookups follow
+   * the creation-time path. Otherwise falls back to [[sqlResolutionPathEntries]] using the view's
+   * catalog/namespace as the path default (so unqualified names inside a view body see the view's
+   * home schema first), while always expanding markers like CURRENT_SCHEMA against the live
+   * session catalog/namespace.
+   *
+   * @param pinnedEntries persisted frozen path entries from view / SQL function metadata
+   *                      (typically `AnalysisContext.resolutionPathEntries`).
+   * @param viewCatalogAndNamespace the view's catalog and namespace
+   *                               (typically `AnalysisContext.catalogAndNamespace`); empty when
+   *                               not resolving a view body.
+   */
+  override def resolutionPathEntriesForAnalysis(
+      pinnedEntries: Option[Seq[Seq[String]]],
+      viewCatalogAndNamespace: Seq[String]): Seq[Seq[String]] = {
+    pinnedEntries match {
+      case Some(entries) if conf.pathEnabled => entries
+      case _ =>
+        val expandCatalog = currentCatalog.name()
+        val expandNamespace = currentNamespace.toSeq
+        val (pathCatalog, pathNamespace) =
+          if (viewCatalogAndNamespace.nonEmpty) {
+            (viewCatalogAndNamespace.head, viewCatalogAndNamespace.tail.toSeq)
+          } else {
+            (expandCatalog, expandNamespace)
+          }
+        sqlResolutionPathEntries(
+          pathCatalog,
+          pathNamespace,
+          expandCatalog,
+          expandNamespace)
+    }
+  }
+
   private var _currentCatalogName: Option[String] = None
 
-  def currentCatalog: CatalogPlugin = synchronized {
+  override def currentCatalog: CatalogPlugin = synchronized {
     catalog(_currentCatalogName.getOrElse(conf.getConf(SQLConf.DEFAULT_CATALOG)))
   }
 
-  def setCurrentCatalog(catalogName: String): Unit = synchronized {
-    // `setCurrentCatalog` is noop if it doesn't switch to a different catalog.
-    if (currentCatalog.name() != catalogName) {
-      catalog(catalogName)
-      _currentCatalogName = Some(catalogName)
-      _currentNamespace = None
+  override def setCurrentCatalog(catalogName: String): Unit = {
+    // SPARK-56939: see [[setCurrentNamespace]]. Avoid nesting [[CatalogManager]]'s lock
+    // across [[v1SessionCatalog.setCurrentDatabase]] (which synchronizes on
+    // [[SessionCatalog]]) to prevent a lock-order inversion with concurrent unqualified
+    // function resolution.
+    val needsSwitch = synchronized {
+      // `setCurrentCatalog` is noop if it doesn't switch to a different catalog.
+      if (currentCatalog.name() != catalogName) {
+        // Force-load the named catalog while holding the manager lock to keep the
+        // not-found error semantics; if loading fails, throw before mutating state.
+        catalog(catalogName)
+        true
+      } else {
+        false
+      }
+    }
+    if (needsSwitch) {
       // Reset the current database of v1 `SessionCatalog` when switching current catalog, so that
       // when we switch back to session catalog, the current namespace definitely is ["default"].
+      // Run this BEFORE publishing the new catalog name so that if a reader observes the new
+      // catalog, the v1 state is already consistent with it.
+      //
+      // Concurrency trade-off versus the pre-SPARK-56939 atomic version: between this v1 write
+      // and the publish below, a concurrent reader of `currentNamespace` sees
+      // `(oldCatalog, v1.currentDb = default)`. When the old catalog is the session catalog
+      // (the common case for `USE CATALOG`), the user's previous namespace is briefly invisible
+      // to that reader until the new name is published. The opposite torn observation
+      // (`newCatalog`, stale `v1.currentDb`) is avoided by this ordering. This trade-off
+      // (transient invisibility instead of transient inconsistency, exchanged for breaking the
+      // deadlock cycle) is accepted; the long-term post-switch state is the same as before.
       v1SessionCatalog.setCurrentDatabase(conf.defaultDatabase)
+      synchronized {
+        _currentCatalogName = Some(catalogName)
+        _currentNamespace = None
+      }
     }
   }
 
-  def listCatalogs(pattern: Option[String]): Seq[String] = {
+  override def listCatalogs(pattern: Option[String]): Seq[String] = {
     val allCatalogs = (synchronized(catalogs.keys.toSeq) :+ SESSION_CATALOG_NAME).distinct.sorted
     pattern.map(StringUtils.filterPattern(allCatalogs, _)).getOrElse(allCatalogs)
   }
 
   // Clear all the registered catalogs. Only used in tests.
-  private[sql] def reset(): Unit = synchronized {
-    catalogs.clear()
-    _currentNamespace = None
-    _currentCatalogName = None
-    _sessionPath = None
+  //
+  // SPARK-56939: apply the same split-lock pattern as [[setCurrentNamespace]] /
+  // [[setCurrentCatalog]] so the locking contract is uniform across every CM mutator that
+  // calls back into [[v1SessionCatalog]]. Test-only callers don't race against unqualified
+  // function resolution today, but keeping the contract symmetric prevents future test
+  // helpers (e.g. session reset in a concurrent harness) from reintroducing the cycle.
+  override private[sql] def reset(): Unit = {
+    synchronized {
+      catalogs.clear()
+      _currentNamespace = None
+      _currentCatalogName = None
+      _sessionPath = None
+      confDefaultPathCache.set(None)
+    }
     v1SessionCatalog.setCurrentDatabase(conf.defaultDatabase)
   }
 }
 
-private[sql] object CatalogManager {
+private[sql] object CatalogManager extends Logging {
 
   val SESSION_CATALOG_NAME: String = "spark_catalog"
   val SYSTEM_CATALOG_NAME = "system"
@@ -237,16 +569,37 @@ private[sql] object CatalogManager {
   /**
    * True if the multipart name uses the session temp view namespace: two-part `session.view`
    * or three-part `system.session.view`. The two-part form can also denote a persistent relation
-   * in schema `session`; resolution order is controlled by [[SQLConf.prioritizeSystemCatalog]].
+   * in schema `session`; resolution order is controlled by [[SQLConf#prioritizeSystemCatalog]].
    */
   def isSessionQualifiedViewName(nameParts: Seq[String]): Boolean = {
     (nameParts.length == 2 && nameParts.head.equalsIgnoreCase(SESSION_NAMESPACE)) ||
       isFullyQualifiedSystemSessionViewName(nameParts)
   }
 
-  /** True if a SQL path entry is the well-known `system.session` entry. */
+  /** True if a SQL path entry is the well-known `system.session` entry (case-insensitive). */
   def isSystemSessionPathEntry(parts: Seq[String]): Boolean =
-    parts == Seq(SYSTEM_CATALOG_NAME, SESSION_NAMESPACE)
+    parts.length == 2 &&
+      parts.head.equalsIgnoreCase(SYSTEM_CATALOG_NAME) &&
+      parts(1).equalsIgnoreCase(SESSION_NAMESPACE)
+
+  /** True if a SQL path entry is the well-known `system.builtin` entry (case-insensitive). */
+  def isSystemBuiltinPathEntry(parts: Seq[String]): Boolean =
+    parts.length == 2 &&
+      parts.head.equalsIgnoreCase(SYSTEM_CATALOG_NAME) &&
+      parts(1).equalsIgnoreCase(BUILTIN_NAMESPACE)
+
+  /**
+   * Extract `system.builtin` / `system.session` entries from a resolved PATH, mapped to
+   * [[SessionCatalog.SessionFunctionKind]] in path order. Pure data conversion -- callers
+   * decide whether and how to use this list.
+   */
+  def systemFunctionKindsFromPath(
+      path: Seq[Seq[String]]): Seq[SessionCatalog.SessionFunctionKind] =
+    path.flatMap { e =>
+      if (isSystemBuiltinPathEntry(e)) Some(SessionCatalog.Builtin)
+      else if (isSystemSessionPathEntry(e)) Some(SessionCatalog.Temp)
+      else None
+    }
 
   /**
    * A single entry in the session SQL path: either a literal schema
@@ -312,4 +665,66 @@ private[sql] object CatalogManager {
     compact(JArray(entries.map(parts =>
       JArray(parts.map(JString(_)).toList)).toList))
   }
+
+  private def parsePathEntries(storedPathStr: String): Either[String, Seq[Seq[String]]] = {
+    import org.json4s.JsonAST.{JArray, JString}
+    import org.json4s.jackson.JsonMethods.parse
+
+    Try(parse(storedPathStr)).toOption match {
+      case Some(JArray(entries)) =>
+        entries.foldLeft(Right(Seq.empty[Seq[String]]): Either[String, Seq[Seq[String]]]) {
+          (acc, entry) =>
+            acc.flatMap { collected =>
+              entry match {
+                case JArray(parts) =>
+                  val strings = parts.collect { case JString(s) => s }
+                  if (strings.size == parts.size) Right(collected :+ strings)
+                  else Left("expected all array entry parts to be JSON strings")
+                case _ =>
+                  Left("expected each top-level array entry to be a JSON array")
+              }
+            }
+        }
+      case Some(_) =>
+        Left("expected top-level JSON array")
+      case None =>
+        Left("failed to parse JSON payload")
+    }
+  }
+
+  /**
+   * Parse a stored frozen path string from view/function metadata.
+   * Returns None if the payload is malformed.
+   */
+  def deserializePathEntries(storedPathStr: String): Option[Seq[Seq[String]]] = {
+    parsePathEntries(storedPathStr) match {
+      case Right(entries) => Some(entries)
+      case Left(reason) =>
+        logWarning(
+          s"Invalid stored SQL path metadata: $reason. Raw payload: $storedPathStr")
+        None
+    }
+  }
+
+  /**
+   * Parse stored frozen path metadata and fail analysis if malformed.
+   */
+  def deserializePathEntriesOrFail(
+      storedPathStr: String,
+      objectType: String,
+      objectName: String): Seq[Seq[String]] = {
+    parsePathEntries(storedPathStr) match {
+      case Right(entries) => entries
+      case Left(reason) =>
+        throw new AnalysisException(
+          message = s"Invalid stored SQL path metadata for $objectType '$objectName': " +
+            s"$reason. Raw payload: $storedPathStr",
+          line = None,
+          startPosition = None,
+          cause = None,
+          errorClass = None,
+          messageParameters = Map.empty,
+          context = Array.empty)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index cf6052009c927..a5f1ca7f1d289 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -171,6 +171,15 @@ private[sql] object CatalogV2Implicits {
         throw QueryCompilationErrors.requiresSinglePartNamespaceError(asMultipartIdentifier)
     }
 
+    // Build a v1 TableIdentifier for display / error-rendering purposes. Collapses a
+    // multi-part namespace to its last segment (v1 TableIdentifier has a single-string
+    // database field). Callers that need a lossless multi-part form should build a
+    // Seq[String] from toQualifiedNameParts instead.
+    def asLegacyTableIdentifier(catalogName: String): TableIdentifier = TableIdentifier(
+      table = ident.name(),
+      database = ident.namespace().lastOption,
+      catalog = Some(catalogName))
+
     /**
      * Tries to convert catalog identifier to the table identifier. Table identifier does not
      * support multiple namespaces (nested namespaces), so if identifier contains nested namespace,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index b29d0b3eabe56..e42d5f3a84457 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -533,6 +533,27 @@ private[sql] object CatalogV2Util {
     catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)
   }
 
+  /**
+   * Construct a [[ViewInfo.Builder]] seeded from an existing view's metadata. Used by ALTER
+   * VIEW execs (SET / UNSET TBLPROPERTIES, ALTER VIEW ... WITH SCHEMA BINDING) -- override
+   * the one field that changes, then `build` to produce the replacement payload for
+   * [[ViewCatalog#replaceView]]. Every other field flows through unchanged so a metadata-only
+   * mutation does not perturb the view body.
+   */
+  def viewInfoBuilderFrom(existing: ViewInfo): ViewInfo.Builder = {
+    val builder = new ViewInfo.Builder()
+    builder
+      .withSchema(existing.schema)
+      .withProperties(existing.properties)
+      .withQueryText(existing.queryText)
+      .withSqlConfigs(existing.sqlConfigs)
+      .withCurrentNamespace(existing.currentNamespace)
+      .withQueryColumnNames(existing.queryColumnNames)
+    Option(existing.currentCatalog).foreach(builder.withCurrentCatalog)
+    Option(existing.schemaMode).foreach(builder.withSchemaMode)
+    builder
+  }
+
   def convertTableProperties(t: TableSpec): Map[String, String] = {
     val props = convertTableProperties(
       t.properties, t.options, t.serde, t.location, t.comment,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
index e6c70fdabb159..c40d5ab679190 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
@@ -64,6 +64,7 @@ private[sql] object Catalogs {
       }
       val plugin = pluginClass.getDeclaredConstructor().newInstance().asInstanceOf[CatalogPlugin]
       plugin.initialize(name, catalogOptions(name, conf))
+      validateTableViewCatalog(name, plugin)
       plugin
     } catch {
       case e: ClassNotFoundException =>
@@ -106,4 +107,22 @@ private[sql] object Catalogs {
     }
     new CaseInsensitiveStringMap(options)
   }
+
+  /**
+   * Reject catalogs that implement both [[TableCatalog]] and [[ViewCatalog]] without
+   * extending [[TableViewCatalog]]. The combined case has cross-cutting rules (single namespace,
+   * cross-type collision rejection, perf opt-ins) that live on [[TableViewCatalog]]; implementing
+   * the two interfaces directly would skip that contract.
+   */
+  private def validateTableViewCatalog(name: String, plugin: CatalogPlugin): Unit = {
+    if (plugin.isInstanceOf[TableCatalog] && plugin.isInstanceOf[ViewCatalog] &&
+        !plugin.isInstanceOf[TableViewCatalog]) {
+      throw new IllegalArgumentException(
+        s"Catalog '$name' (${plugin.getClass.getName}) implements both TableCatalog and " +
+          s"ViewCatalog directly. Catalogs that expose both tables and views must implement " +
+          s"TableViewCatalog instead, which centralizes the cross-cutting rules (shared " +
+          s"identifier namespace, cross-type collision rejection, single-RPC perf entry " +
+          s"points).")
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
index 203cfc23452a8..14c0663730324 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.connector.catalog
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedIdentifier, UnresolvedRelation, V2TableReference}
+import org.apache.spark.sql.catalyst.plans.logical.TransactionalWrite
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 
@@ -163,4 +165,22 @@ private[sql] trait LookupCatalog extends Logging {
       }
     }
   }
+
+  object TransactionalWrite {
+    def unapply(write: TransactionalWrite): Option[TransactionalCatalogPlugin] = {
+      EliminateSubqueryAliases(write.table) match {
+        case UnresolvedRelation(CatalogAndIdentifier(c: TransactionalCatalogPlugin, _), _, _) =>
+          Some(c)
+        case UnresolvedIdentifier(CatalogAndIdentifier(c: TransactionalCatalogPlugin, _), _) =>
+          Some(c)
+        case ref: V2TableReference =>
+          ref.catalog match {
+            case c: TransactionalCatalogPlugin => Some(c)
+            case _ => None
+          }
+        case _ =>
+          None
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/PathElement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/PathElement.scala
new file mode 100644
index 0000000000000..ee9959762da9e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/PathElement.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.CatalogManager.{
+  CurrentSchemaEntry, LiteralPathEntry, SessionPathEntry
+}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * One element on the right-hand side of `SET PATH = ...`: either a well-known shortcut
+ * keyword (DEFAULT_PATH, SYSTEM_PATH, PATH, CURRENT_SCHEMA / CURRENT_DATABASE) or a
+ * fully qualified schema reference (`catalog.namespace...` with at least 2 parts).
+ *
+ * The same grammar is reused to parse the
+ * [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]] conf value, so this
+ * AST node lives in catalyst beside [[CatalogManager]] rather than in the runtime
+ * [[org.apache.spark.sql.execution.command.SetPathCommand]].
+ */
+private[sql] sealed trait PathElement
+
+private[sql] object PathElement {
+  case object DefaultPath extends PathElement
+  case object SystemPath extends PathElement
+  case object PathRef extends PathElement
+
+  /**
+   * Current database/schema (SQL aliases). Stored as the
+   * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]]
+   * marker so resolution candidates expand against the live `USE SCHEMA`.
+   */
+  case object CurrentSchema extends PathElement
+
+  /** Fully qualified schema reference (`catalog.namespace...`). Must have at least 2 parts. */
+  case class SchemaInPath(parts: Seq[String]) extends PathElement
+
+  /**
+   * Expand a parsed [[PathElement]] list into concrete [[SessionPathEntry]] entries
+   * suitable for storing in [[CatalogManager._sessionPath]] or returning from
+   * [[CatalogManager#sessionPathEntries]].
+   *
+   * @param isConfDefaultExpansion when true, an inner [[DefaultPath]] token resolves
+   *                               to the spark-builtin default ordering (cycle break)
+   *                               rather than reading
+   *                               [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]] again.
+   *                               Set to true when this method is invoked while
+   *                               parsing [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]]
+   *                               itself.
+   */
+  def expand(
+      elements: Seq[PathElement],
+      conf: SQLConf,
+      catalogManager: CatalogManager,
+      isConfDefaultExpansion: Boolean = false): Seq[SessionPathEntry] = {
+    val currentSchemaSentinel = Seq("__current_schema__")
+
+    def toEntries(parts: Seq[Seq[String]]): Seq[SessionPathEntry] = parts.map {
+      case p if p == currentSchemaSentinel => CurrentSchemaEntry
+      case p => LiteralPathEntry(p)
+    }
+
+    def builtinDefaultWithCurrentSchema: Seq[SessionPathEntry] =
+      toEntries(conf.defaultPathOrder(Seq(currentSchemaSentinel)))
+
+    def defaultPathExpansion: Seq[SessionPathEntry] = {
+      if (isConfDefaultExpansion) {
+        // Cycle break: inner DEFAULT_PATH inside the conf default value falls back to the
+        // spark-builtin default ordering instead of recursing.
+        builtinDefaultWithCurrentSchema
+      } else {
+        catalogManager.confDefaultPathEntries.getOrElse(builtinDefaultWithCurrentSchema)
+      }
+    }
+
+    elements.flatMap {
+      case DefaultPath =>
+        defaultPathExpansion
+      case SystemPath =>
+        toEntries(conf.systemPathOrder)
+      case CurrentSchema =>
+        Seq(CurrentSchemaEntry)
+      case PathRef =>
+        catalogManager.storedSessionPathEntries.getOrElse(defaultPathExpansion)
+      case SchemaInPath(parts) =>
+        Seq(LiteralPathEntry(parts))
+    }
+  }
+
+  /**
+   * Reject *static* duplicates in a SET PATH entry list: identical
+   * [[CatalogManager#LiteralPathEntry]] parts and repeated
+   * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]] markers
+   * (the `current_schema` / `current_database`
+   * cross-alias case). Used for the interactive `SET PATH` form to surface user typos at
+   * statement time.
+   *
+   * Deliberately does NOT compare a [[CatalogManager#LiteralPathEntry]] against a
+   * [[org.apache.spark.sql.connector.catalog.CatalogManager.CurrentSchemaEntry$]]: such a
+   * "duplicate" depends on the live `USE SCHEMA`
+   * and is harmless at lookup (first-match resolution skips the dead literal).
+   * [[org.apache.spark.sql.internal.SQLConf#DEFAULT_PATH()]] expansion skips this check
+   * entirely so transient `USE`-induced
+   * collisions don't wedge unqualified resolution.
+   */
+  def validateNoStaticDuplicates(
+      entries: Seq[SessionPathEntry],
+      caseSensitive: Boolean): Seq[SessionPathEntry] = {
+    val seenLiterals = new mutable.HashSet[Seq[String]]
+    var seenCurrentSchema = false
+    entries.foreach {
+      case CurrentSchemaEntry =>
+        if (seenCurrentSchema) {
+          throw new AnalysisException(
+            errorClass = "DUPLICATE_SQL_PATH_ENTRY",
+            messageParameters = Map("pathEntry" -> "current_schema"))
+        }
+        seenCurrentSchema = true
+      case LiteralPathEntry(parts) =>
+        val key = if (caseSensitive) parts else parts.map(_.toLowerCase(Locale.ROOT))
+        if (!seenLiterals.add(key)) {
+          throw new AnalysisException(
+            errorClass = "DUPLICATE_SQL_PATH_ENTRY",
+            messageParameters = Map(
+              "pathEntry" ->
+                parts.map(p => if (p.contains(".")) s"`$p`" else p).mkString(".")))
+        }
+    }
+    entries
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/TransactionAwareCatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/TransactionAwareCatalogManager.scala
new file mode 100644
index 0000000000000..e7a0cc73a3503
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/TransactionAwareCatalogManager.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, TempVariableManager}
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog.SessionFunctionKind
+import org.apache.spark.sql.connector.catalog.CatalogManager.SessionPathEntry
+import org.apache.spark.sql.connector.catalog.transactions.Transaction
+
+/**
+ * A [[CatalogManager]] decorator that redirects catalog lookups to the transaction's catalog
+ * instance when names match, ensuring table loads during analysis are scoped to the transaction.
+ * All mutable session state is delegated to the wrapped [[CatalogManager]].
+ */
+private[sql] class TransactionAwareCatalogManager(
+    delegate: CatalogManager,
+    txn: Transaction) extends CatalogManager {
+
+  // ---- Underlying state: pure delegation. ----
+  override def defaultSessionCatalog: CatalogPlugin = delegate.defaultSessionCatalog
+  override def v1SessionCatalog: SessionCatalog = delegate.v1SessionCatalog
+  override def tempVariableManager: TempVariableManager = delegate.tempVariableManager
+
+  // ---- Catalog access: redirect to txn catalog when names match. ----
+  override def catalog(name: String): CatalogPlugin = {
+    val resolved = delegate.catalog(name)
+    if (txn.catalog.name() == resolved.name()) txn.catalog else resolved
+  }
+
+  override private[sql] def v2SessionCatalog: CatalogPlugin = delegate.v2SessionCatalog
+
+  override def listCatalogs(pattern: Option[String]): Seq[String] =
+    delegate.listCatalogs(pattern)
+
+  override def transaction: Option[Transaction] = Some(txn)
+
+  override def withTransaction(newTxn: Transaction): CatalogManager =
+    throw SparkException.internalError("Cannot nest transactions: a transaction is already active.")
+
+  override def currentCatalog: CatalogPlugin = {
+    val c = delegate.currentCatalog
+    if (txn.catalog.name() == c.name()) txn.catalog else c
+  }
+
+  override def setCurrentCatalog(catalogName: String): Unit =
+    delegate.setCurrentCatalog(catalogName)
+
+  override def currentNamespace: Array[String] = delegate.currentNamespace
+
+  override def setCurrentNamespace(namespace: Array[String]): Unit =
+    delegate.setCurrentNamespace(namespace)
+
+  override def sessionPathEntries: Option[Seq[SessionPathEntry]] =
+    delegate.sessionPathEntries
+
+  override def storedSessionPathEntries: Option[Seq[SessionPathEntry]] =
+    delegate.storedSessionPathEntries
+
+  override def confDefaultPathEntries: Option[Seq[SessionPathEntry]] =
+    delegate.confDefaultPathEntries
+
+  override def setSessionPath(entries: Seq[SessionPathEntry]): Unit =
+    delegate.setSessionPath(entries)
+
+  override def clearSessionPath(): Unit = delegate.clearSessionPath()
+
+  override private[sql] def copySessionPathFrom(other: CatalogManager): Unit =
+    delegate.copySessionPathFrom(other)
+
+  override def currentPathString: String = delegate.currentPathString
+
+  override def sqlResolutionPathEntries(
+      pathDefaultCatalog: String,
+      pathDefaultNamespace: Seq[String],
+      expandCatalog: String,
+      expandNamespace: Seq[String]): Seq[Seq[String]] =
+    delegate.sqlResolutionPathEntries(
+      pathDefaultCatalog, pathDefaultNamespace, expandCatalog, expandNamespace)
+
+  override def sqlResolutionPathEntries(
+      currentCatalog: String,
+      currentNamespace: Seq[String]): Seq[Seq[String]] =
+    delegate.sqlResolutionPathEntries(currentCatalog, currentNamespace)
+
+  override def isSystemSessionOnPath: Boolean = delegate.isSystemSessionOnPath
+
+  override def resolutionPathEntriesForAnalysis(
+      pinnedEntries: Option[Seq[Seq[String]]],
+      viewCatalogAndNamespace: Seq[String]): Seq[Seq[String]] =
+    delegate.resolutionPathEntriesForAnalysis(pinnedEntries, viewCatalogAndNamespace)
+
+  override def sessionFunctionKindsForUnqualifiedResolution(): Seq[SessionFunctionKind] =
+    delegate.sessionFunctionKindsForUnqualifiedResolution()
+
+  override private[sql] def reset(): Unit = delegate.reset()
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index eee6ddf3e58fd..8a47cac8e7962 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -22,8 +22,8 @@ import java.util
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils}
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.types.StructType
@@ -49,7 +49,6 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
   override lazy val schema: StructType = v1Table.schema
 
   override lazy val partitioning: Array[Transform] = {
-    import CatalogV2Implicits._
     val partitions = new mutable.ArrayBuffer[Transform]()
 
     v1Table.partitionColumnNames.foreach { col =>
@@ -106,9 +105,118 @@ private[sql] object V1Table {
       case CatalogTableType.EXTERNAL => Some(TableSummary.EXTERNAL_TABLE_TYPE)
       case CatalogTableType.MANAGED => Some(TableSummary.MANAGED_TABLE_TYPE)
       case CatalogTableType.VIEW => Some(TableSummary.VIEW_TABLE_TYPE)
+      case CatalogTableType.METRIC_VIEW => Some(TableSummary.METRIC_VIEW_TABLE_TYPE)
       case _ => None
     }
   }
+
+  def toCatalogTable(
+      catalog: CatalogPlugin,
+      ident: Identifier,
+      t: MetadataTable): CatalogTable = t.getTableInfo match {
+    case viewInfo: ViewInfo => toCatalogTable(catalog, ident, viewInfo)
+    case tableInfo => toCatalogTable(catalog, ident, tableInfo)
+  }
+
+  private def toCatalogTable(
+      catalog: CatalogPlugin,
+      ident: Identifier,
+      info: TableInfo): CatalogTable = {
+    val props = info.properties.asScala.toMap
+    // PROP_TABLE_TYPE is advisory on the v2 side: it may be absent or carry a value that has no
+    // v1 mapping (e.g. TableSummary.FOREIGN_TABLE_TYPE). v1 only has EXTERNAL/MANAGED, so
+    // anything other than the explicit MANAGED mapping falls back to EXTERNAL for the v1
+    // representation -- the same default v1 uses when the value is missing. VIEW is reached
+    // only through the ViewInfo branch above.
+    val tableType = props.get(TableCatalog.PROP_TABLE_TYPE) match {
+      case Some(TableSummary.MANAGED_TABLE_TYPE) => CatalogTableType.MANAGED
+      case _ => CatalogTableType.EXTERNAL
+    }
+    // Reserved keys are promoted to first-class CatalogTable fields; strip them from the
+    // user-visible properties map so they're not double-persisted or leaked into the serde bag.
+    val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES
+    val (serdeProps, tableProps) = userProps.toSeq
+      .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX))
+    val tablePropsMap = tableProps.toMap
+    val (partCols, bucketSpec, clusterBySpec) = info.partitions.toSeq.convertTransforms
+    CatalogTable(
+      // `asLegacyTableIdentifier` collapses multi-part namespaces to their last segment (v1
+      // limitation). We record the full multi-part form in `multipartIdentifier` below;
+      // callers needing the real fully-qualified name should read `CatalogTable.fullIdent`.
+      identifier = ident.asLegacyTableIdentifier(catalog.name()),
+      tableType = tableType,
+      storage = CatalogStorageFormat.empty.copy(
+        locationUri = props.get(TableCatalog.PROP_LOCATION).map(CatalogUtils.stringToURI),
+        // v2 table properties should be put into the serde properties as well in case
+        // they contain data source options.
+        properties = tablePropsMap ++ serdeProps.map {
+          case (k, v) => k.drop(TableCatalog.OPTION_PREFIX.length) -> v
+        }
+      ),
+      schema = CatalogV2Util.v2ColumnsToStructType(info.columns),
+      provider = props.get(TableCatalog.PROP_PROVIDER),
+      partitionColumnNames = partCols,
+      bucketSpec = bucketSpec,
+      owner = props.getOrElse(TableCatalog.PROP_OWNER, ""),
+      comment = props.get(TableCatalog.PROP_COMMENT),
+      collation = props.get(TableCatalog.PROP_COLLATION),
+      properties = tablePropsMap ++
+        clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation),
+      multipartIdentifier = Some(catalog.name() +: ident.asMultipartIdentifier)
+    )
+  }
+
+  def toCatalogTable(
+      catalog: CatalogPlugin,
+      ident: Identifier,
+      info: ViewInfo): CatalogTable = {
+    val props = info.properties.asScala.toMap
+    val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES
+    // Serde/OPTION properties only apply to data-source tables; views' user properties are a
+    // plain TBLPROPERTIES bag.
+    val tablePropsMap = userProps
+    val viewContextProps = if (info.currentCatalog != null && info.currentCatalog.nonEmpty) {
+      CatalogTable.catalogAndNamespaceToProps(
+        info.currentCatalog, info.currentNamespace.toSeq)
+    } else {
+      Map.empty[String, String]
+    }
+    val sqlConfigProps = info.sqlConfigs.asScala.map {
+      case (k, v) => s"${CatalogTable.VIEW_SQL_CONFIG_PREFIX}$k" -> v
+    }.toMap
+    val queryOutputProps = if (info.queryColumnNames.isEmpty) {
+      Map.empty[String, String]
+    } else {
+      val numCols = info.queryColumnNames.length
+      val perColProps = info.queryColumnNames.zipWithIndex.map { case (name, idx) =>
+        s"${CatalogTable.VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX}$idx" -> name
+      }.toMap
+      perColProps + (CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> numCols.toString)
+    }
+    val schemaModeProps = Option(info.schemaMode)
+      .map(m => Map(CatalogTable.VIEW_SCHEMA_MODE -> m))
+      .getOrElse(Map.empty)
+    // ViewInfo always represents a view-like table, but PROP_TABLE_TYPE may further refine the
+    // kind (e.g. METRIC_VIEW). Default to plain VIEW when no refinement is supplied.
+    val tableType = props.get(TableCatalog.PROP_TABLE_TYPE) match {
+      case Some(TableSummary.METRIC_VIEW_TABLE_TYPE) => CatalogTableType.METRIC_VIEW
+      case _ => CatalogTableType.VIEW
+    }
+    CatalogTable(
+      identifier = ident.asLegacyTableIdentifier(catalog.name()),
+      tableType = tableType,
+      storage = CatalogStorageFormat.empty,
+      schema = CatalogV2Util.v2ColumnsToStructType(info.columns),
+      owner = props.getOrElse(TableCatalog.PROP_OWNER, ""),
+      viewText = Some(info.queryText),
+      viewOriginalText = Some(info.queryText),
+      comment = props.get(TableCatalog.PROP_COMMENT),
+      collation = props.get(TableCatalog.PROP_COLLATION),
+      properties = tablePropsMap ++ viewContextProps ++ sqlConfigProps ++
+        queryOutputProps ++ schemaModeProps,
+      multipartIdentifier = Some(catalog.name() +: ident.asMultipartIdentifier)
+    )
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1ViewInfo.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1ViewInfo.scala
new file mode 100644
index 0000000000000..e18fe52385a1c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1ViewInfo.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connector.catalog
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+
+/**
+ * A v1 [[CatalogTable]] (representing a session-catalog view) exposed through the v2
+ * [[ViewInfo]] surface, mirroring the way [[V1Table]] exposes a v1 table CatalogTable through
+ * the v2 [[Table]] surface. Holds the original [[CatalogTable]] in [[v1Table]] for v1-only
+ * paths that need the full v1 metadata representation (e.g. `DescribeTableCommand`,
+ * `ShowCreateTableCommand`, anything that calls `CatalogTable#toLinkedHashMap`).
+ *
+ * Note on `properties()`: the inherited [[ViewInfo#properties]] bag is built from the entire
+ * `v1Table.properties` map, which intermixes user TBLPROPERTIES with v1-internal storage keys
+ * (`view.sqlConfig.*`, `view.catalogAndNamespace.*`, `view.query.out.*`, `view.schemaMode`).
+ * v2 view inspection / SET execs (`ShowV2ViewPropertiesExec`, `AlterV2ViewSetPropertiesExec`,
+ * etc.) never see a `V1ViewInfo` -- `ResolveSessionCatalog` rewrites session-catalog views to
+ * v1 commands first -- so the bag stays internal to v1-only paths. Consumers that do receive
+ * a `V1ViewInfo` should prefer the typed accessors ([[ViewInfo#sqlConfigs]],
+ * [[ViewInfo#currentNamespace]], [[ViewInfo#currentCatalog]], [[ViewInfo#queryColumnNames]],
+ * [[ViewInfo#schemaMode]]) for the v1-internal fields rather than scraping `properties()` for
+ * them.
+ */
+private[sql] class V1ViewInfo(val v1Table: CatalogTable)
+    extends ViewInfo(V1ViewInfo.builderFrom(v1Table))
+
+private[sql] object V1ViewInfo {
+  /**
+   * Convert a v1 [[CatalogTable]] view into a [[ViewInfo.Builder]] with the same fields.
+   * Used as the {@code super(builder)} argument when constructing a [[V1ViewInfo]].
+   */
+  private def builderFrom(v1Table: CatalogTable): ViewInfo.Builder = {
+    val builder = new ViewInfo.Builder()
+    builder.withSchema(v1Table.schema)
+    builder.withProperties(v1Table.properties.asJava)
+    // v1 stores collation / comment in typed `CatalogTable` fields rather than in `properties`,
+    // but consumers reading off [[ViewInfo]] (`ApplyDefaultCollation.fetchDefaultCollation`,
+    // `ShowCreateV2ViewExec`, etc.) expect them under `PROP_COLLATION` / `PROP_COMMENT`. Bridge
+    // them through the typed setters so the v2 surface sees the same view metadata regardless
+    // of which catalog produced it.
+    v1Table.collation.foreach(builder.withCollation)
+    v1Table.comment.foreach(builder.withComment)
+    // ViewInfo requires a non-null queryText; v1 views always have one, but defend against
+    // an old/corrupt CatalogTable with `viewText = None` by falling back to an empty string.
+    builder.withQueryText(v1Table.viewText.getOrElse(""))
+    val cn = v1Table.viewCatalogAndNamespace
+    if (cn.nonEmpty) {
+      builder.withCurrentCatalog(cn.head)
+      builder.withCurrentNamespace(cn.tail.toArray)
+    }
+    builder.withSqlConfigs(v1Table.viewSQLConfigs.asJava)
+    Option(v1Table.viewSchemaMode).foreach(m => builder.withSchemaMode(m.toString))
+    builder.withQueryColumnNames(v1Table.viewQueryColumnNames.toArray)
+    builder
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala
index c7f7b17a58430..348d7e96e7d46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala
@@ -19,10 +19,13 @@ package org.apache.spark.sql.connector.catalog
 
 import java.util.Locale
 
+import scala.collection.mutable
+
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, MetadataColumnHelper}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.sql.util.SchemaValidationMode
@@ -121,6 +124,66 @@ private[sql] object V2TableUtil extends SQLConfHelper {
     SchemaUtils.validateSchemaCompatibility(originMetaSchema, metaSchema, resolver, mode)
   }
 
+  /**
+   * Validates that column IDs have not changed for columns that still exist in the table.
+   *
+   * Only validates columns where the original and current column both have non-null IDs.
+   * If the connector does not support column IDs (returns null), the validation is skipped.
+   *
+   * @param table the current table metadata
+   * @param relation the relation with captured columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateColumnIds(
+      table: Table,
+      relation: DataSourceV2Relation): Seq[String] = {
+    validateColumnIds(
+      table = table,
+      originalCapturedCols = relation.table.columns.toImmutableArraySeq)
+  }
+
+  /**
+   * Validates that column IDs have not changed for columns that still exist in the table.
+   *
+   * Only validates columns where the original and current column both have non-null IDs.
+   * If the connector does not support column IDs (returns null), the validation is skipped.
+   *
+   * ID transition handling:
+   *  - null to null: skipped (no ID to validate)
+   *  - null to ID: skipped (connector enabled ID tracking after analysis)
+   *  - ID to null: skipped (connector disabled ID tracking)
+   *  - ID to ID (same): no error
+   *  - ID to ID (different): error, same column name was replaced
+   *
+   * @param table the current table metadata
+   * @param originalCapturedCols the originally captured columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateColumnIds(
+      table: Table,
+      originalCapturedCols: Seq[Column]): Seq[String] = {
+    val currentColsByNormalizedName = table.columns.toImmutableArraySeq
+      .map(currentCol => normalize(currentCol.name()) -> currentCol).toMap
+    val errors = new mutable.ArrayBuffer[String]()
+    for (originalCol <- originalCapturedCols) {
+      if (originalCol.id() != null) {
+        currentColsByNormalizedName.get(normalize(originalCol.name())) match {
+          case Some(currentCol)
+            if currentCol.id() != null && currentCol.id() != originalCol.id() =>
+            errors += s"`${originalCol.name()}` column ID has changed from " +
+              s"${originalCol.id()} to ${currentCol.id()}"
+          case _ =>
+            // 1. Column exists in the original schema but not in the current table.
+            // 2. Column IDs have not changed.
+            // 3. The current column's ID is null (connector disabled ID tracking).
+            // Note that dropped columns are handled separately by
+            // [[columnsMissingOrAddedAfterAnalysis]].
+        }
+      }
+    }
+    errors.toSeq
+  }
+
   private def filter(colNames: Seq[String], cols: Seq[MetadataColumn]): Seq[MetadataColumn] = {
     val normalizedColNames = colNames.map(normalize).toSet
     cols.filter(col => normalizedColNames.contains(normalize(col.name)))
@@ -131,6 +194,20 @@ private[sql] object V2TableUtil extends SQLConfHelper {
     case _ => Seq.empty
   }
 
+  /**
+   * Validates that the identity of a loaded table matches a previously captured table id.
+   * Throws if the table was dropped and recreated under the same name (which changes the id).
+   * No-op if the connector does not support table ids (capturedId is null).
+   */
+  def validateTableId(name: String, capturedId: String, currentTable: Table): Unit = {
+    if (capturedId != null && capturedId != currentTable.id) {
+      throw QueryCompilationErrors.tableIdChangedAfterAnalysis(
+        name,
+        capturedTableId = capturedId,
+        currentTableId = currentTable.id)
+    }
+  }
+
   private def normalize(name: String): String = {
     if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/transactions/TransactionInfoImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/transactions/TransactionInfoImpl.scala
new file mode 100644
index 0000000000000..4cb53da0a59e2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/transactions/TransactionInfoImpl.scala
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog.transactions
+
+case class TransactionInfoImpl(id: String) extends TransactionInfo
diff --git a/udf/worker/proto/src/main/scala/org/apache/spark/udf/worker/WorkerSpecification.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/InsertSummaryImpl.scala
similarity index 76%
rename from udf/worker/proto/src/main/scala/org/apache/spark/udf/worker/WorkerSpecification.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/InsertSummaryImpl.scala
index e25b99b69990c..97c2e082c2573 100644
--- a/udf/worker/proto/src/main/scala/org/apache/spark/udf/worker/WorkerSpecification.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/InsertSummaryImpl.scala
@@ -14,14 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.udf.worker
 
-import org.apache.spark.annotation.Experimental
+package org.apache.spark.sql.connector.write
 
 /**
- * :: Experimental ::
- * Typed Scala wrapper around the protobuf [[UDFWorkerSpecification]].
+ * Implementation of [[InsertSummary]] that provides INSERT operation summary.
  */
-@Experimental
-class WorkerSpecification(val proto: UDFWorkerSpecification) {
+private[sql] case class InsertSummaryImpl(numInsertedRows: Long) extends InsertSummary {
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index bd1e876c9fbd6..d5a9cc723bc34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -396,13 +396,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
-  def trimCollationNotEnabledError(): Throwable = {
-    new AnalysisException(
-      errorClass = "UNSUPPORTED_FEATURE.TRIM_COLLATION",
-      messageParameters = Map.empty
-    )
-  }
-
   def trailingCommaInSelectError(origin: Origin): Throwable = {
     new AnalysisException(
       errorClass = "TRAILING_COMMA_IN_SELECT",
@@ -567,11 +560,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
   }
 
   def viewDepthExceedsMaxResolutionDepthError(
-      identifier: TableIdentifier, maxNestedDepth: Int, t: TreeNode[_]): Throwable = {
+      viewNameParts: Seq[String], maxNestedDepth: Int, t: TreeNode[_]): Throwable = {
     new AnalysisException(
       errorClass = "VIEW_EXCEED_MAX_NESTED_DEPTH",
       messageParameters = Map(
-        "viewName" -> toSQLId(identifier.nameParts),
+        "viewName" -> toSQLId(viewNameParts),
         "maxNestedDepth" -> maxNestedDepth.toString),
       origin = t.origin)
   }
@@ -916,23 +909,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("dt" -> dt.toString))
   }
 
-  def unresolvedVariableError(name: Seq[String], searchPath: Seq[String]): Throwable = {
-    new AnalysisException(
-      errorClass = "UNRESOLVED_VARIABLE",
-      messageParameters = Map(
-        "variableName" -> toSQLId(name),
-        "searchPath" -> toSQLId(searchPath)))
-  }
-
   def unresolvedVariableError(
       name: Seq[String],
-      searchPath: Seq[String],
+      pathEntries: Seq[Seq[String]],
       origin: Origin): Throwable = {
     new AnalysisException(
       errorClass = "UNRESOLVED_VARIABLE",
       messageParameters = Map(
         "variableName" -> toSQLId(name),
-        "searchPath" -> toSQLId(searchPath)),
+        "searchPath" -> pathEntries.map(toSQLId).mkString("[", ", ", "]")),
       origin = origin)
   }
 
@@ -2242,7 +2227,17 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "currentTableId" -> currentTableId))
   }
 
-  def columnsChangedAfterAnalysis(
+  def columnIdMismatchAfterAnalysis(
+      tableName: String,
+      errors: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> toSQLId(tableName),
+        "errors" -> errors.mkString("- ", "\n- ", "")))
+  }
+
+  def columnsMissingOrAddedAfterAnalysis(
       tableName: String,
       errors: Seq[String]): Throwable = {
     new AnalysisException(
@@ -2535,6 +2530,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("sourceName" -> sourceName))
   }
 
+  def invalidStreamingSinkNameError(sinkName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "STREAMING_QUERY_EVOLUTION_ERROR.INVALID_SINK_NAME",
+      messageParameters = Map("sinkName" -> sinkName))
+  }
+
   def duplicateStreamingSourceNamesError(duplicateNames: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "STREAMING_QUERY_EVOLUTION_ERROR.DUPLICATE_SOURCE_NAMES",
@@ -2816,6 +2817,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map.empty)
   }
 
+  def invalidMetricViewYamlError(message: String, cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_METRIC_VIEW_YAML",
+      messageParameters = Map("message" -> message),
+      cause = Some(cause))
+  }
+
   def noSuchStructFieldInGivenFieldsError(
       fieldName: String, fields: Array[StructField]): Throwable = {
     new AnalysisException(
@@ -3313,6 +3321,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("tableName" -> toSQLId(table)))
   }
 
+  def showCreateTableNotSupportedOnMetricViewError(table: String): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_SHOW_CREATE_TABLE.ON_METRIC_VIEW",
+      messageParameters = Map("tableName" -> toSQLId(table)))
+  }
+
   def showCreateTableNotSupportTransactionalHiveTableError(table: CatalogTable): Throwable = {
     new AnalysisException(
       errorClass = "UNSUPPORTED_SHOW_CREATE_TABLE.ON_TRANSACTIONAL_HIVE_TABLE",
@@ -3353,25 +3367,25 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
   }
 
   def cannotCreateViewTooManyColumnsError(
-      viewIdent: TableIdentifier,
+      viewNameParts: Seq[String],
       expected: Seq[String],
       query: LogicalPlan): Throwable = {
     new AnalysisException(
       errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
       messageParameters = Map(
-        "viewName" -> toSQLId(viewIdent.nameParts),
+        "viewName" -> toSQLId(viewNameParts),
         "viewColumns" -> expected.map(c => toSQLId(c)).mkString(", "),
         "dataColumns" -> query.output.map(c => toSQLId(c.name)).mkString(", ")))
   }
 
   def cannotCreateViewNotEnoughColumnsError(
-      viewIdent: TableIdentifier,
+      viewNameParts: Seq[String],
       expected: Seq[String],
       query: LogicalPlan): Throwable = {
     new AnalysisException(
       errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
       messageParameters = Map(
-        "viewName" -> toSQLId(viewIdent.nameParts),
+        "viewName" -> toSQLId(viewNameParts),
         "viewColumns" -> expected.map(c => toSQLId(c)).mkString(", "),
         "dataColumns" -> query.output.map(c => toSQLId(c.name)).mkString(", ")))
   }
@@ -3383,12 +3397,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
   }
 
   def unsupportedCreateOrReplaceViewOnTableError(
-      name: TableIdentifier, replace: Boolean): Throwable = {
+      nameParts: Seq[String], replace: Boolean): Throwable = {
     if (replace) {
       new AnalysisException(
         errorClass = "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE",
         messageParameters = Map(
-          "tableName" -> toSQLId(name.nameParts),
+          "tableName" -> toSQLId(nameParts),
           "operation" -> "CREATE OR REPLACE VIEW"
         )
       )
@@ -3396,16 +3410,16 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       new AnalysisException(
         errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
         messageParameters = Map(
-          "relationName" -> toSQLId(name.nameParts)
+          "relationName" -> toSQLId(nameParts)
         )
       )
     }
   }
 
-  def viewAlreadyExistsError(name: TableIdentifier): Throwable = {
+  def viewAlreadyExistsError(nameParts: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
-      messageParameters = Map("relationName" -> name.toString))
+      messageParameters = Map("relationName" -> toSQLId(nameParts)))
   }
 
   def createPersistedViewFromDatasetAPINotAllowedError(): Throwable = {
@@ -3415,57 +3429,63 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
   }
 
   def recursiveViewDetectedError(
-      viewIdent: TableIdentifier,
-      newPath: Seq[TableIdentifier]): Throwable = {
+      viewIdent: Seq[String],
+      newPath: Seq[Seq[String]]): Throwable = {
     new AnalysisException(
       errorClass = "RECURSIVE_VIEW",
       messageParameters = Map(
-        "viewIdent" -> toSQLId(viewIdent.nameParts),
-        "newPath" -> newPath.map(p => toSQLId(p.nameParts)).mkString(" -> ")))
+        "viewIdent" -> toSQLId(viewIdent),
+        "newPath" -> newPath.map(toSQLId).mkString(" -> ")))
+  }
+
+  def recursiveFileLookupNotSupportedForPartitionedDataSourceError(): Throwable = {
+    new AnalysisException(
+      errorClass = "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE",
+      messageParameters = Map.empty)
   }
 
   def notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError(
-      name: TableIdentifier,
+      viewNameParts: Seq[String],
       attr: Attribute): Throwable = {
     new AnalysisException(
       errorClass = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS",
       messageParameters = Map(
-        "name" -> toSQLId(name.nameParts),
+        "name" -> toSQLId(viewNameParts),
         "attr" -> toSQLExpr(attr)))
   }
 
   def notAllowedToCreatePermanentViewByReferencingTempViewError(
-      name: TableIdentifier,
-      nameParts: String): Throwable = {
+      viewNameParts: Seq[String],
+      tempViewNameParts: String): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_TEMP_OBJ_REFERENCE",
       messageParameters = Map(
         "obj" -> "VIEW",
-        "objName" -> toSQLId(name.nameParts),
+        "objName" -> toSQLId(viewNameParts),
         "tempObj" -> "VIEW",
-        "tempObjName" -> toSQLId(nameParts)))
+        "tempObjName" -> toSQLId(tempViewNameParts)))
   }
 
   def notAllowedToCreatePermanentViewByReferencingTempFuncError(
-      name: TableIdentifier,
+      viewNameParts: Seq[String],
       funcName: String): Throwable = {
      new AnalysisException(
       errorClass = "INVALID_TEMP_OBJ_REFERENCE",
       messageParameters = Map(
         "obj" -> "VIEW",
-        "objName" -> toSQLId(name.nameParts),
+        "objName" -> toSQLId(viewNameParts),
         "tempObj" -> "FUNCTION",
         "tempObjName" -> toSQLId(funcName)))
   }
 
   def notAllowedToCreatePermanentViewByReferencingTempVarError(
-      nameParts: Seq[String],
+      viewNameParts: Seq[String],
       varName: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_TEMP_OBJ_REFERENCE",
       messageParameters = Map(
         "obj" -> "VIEW",
-        "objName" -> toSQLId(nameParts),
+        "objName" -> toSQLId(viewNameParts),
         "tempObj" -> "VARIABLE",
         "tempObjName" -> toSQLId(varName)))
   }
@@ -3673,6 +3693,24 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "createMode" -> toDSOption(createMode)))
   }
 
+  def schemaEvolutionNotSupportedForCreateTableWriteError(): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_SCHEMA_EVOLUTION.CREATE_TABLE",
+      messageParameters = Map.empty)
+  }
+
+  def schemaEvolutionNotSupportedForReplaceTableWriteError(): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_SCHEMA_EVOLUTION.REPLACE_TABLE",
+      messageParameters = Map.empty)
+  }
+
+  def schemaEvolutionNotSupportedForV1TableWriteError(): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_SCHEMA_EVOLUTION.V1_TABLE",
+      messageParameters = Map.empty)
+  }
+
   def partitionByDoesNotAllowedWhenUsingInsertIntoError(tableName: String): Throwable = {
     new AnalysisException(
       errorClass = "PARTITION_BY_NOT_ALLOWED_WITH_INSERT_INTO",
@@ -3862,6 +3900,42 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("catalogName" -> catalogName))
   }
 
+  def cdcUpdateDetectionRequiresCarryOverRemoval(
+      changelogName: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "INVALID_CDC_OPTION.UPDATE_DETECTION_REQUIRES_CARRY_OVER_REMOVAL",
+      messageParameters = Map("changelogName" -> changelogName))
+  }
+
+  def changelogMissingColumnError(
+      changelogName: String, columnName: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN",
+      messageParameters = Map(
+        "changelogName" -> changelogName,
+        "columnName" -> columnName))
+  }
+
+  def changelogInvalidColumnTypeError(
+      changelogName: String,
+      columnName: String,
+      expectedType: String,
+      actualType: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "INVALID_CHANGELOG_SCHEMA.INVALID_COLUMN_TYPE",
+      messageParameters = Map(
+        "changelogName" -> changelogName,
+        "columnName" -> columnName,
+        "expectedType" -> expectedType,
+        "actualType" -> actualType))
+  }
+
+  def changelogMissingRowIdError(changelogName: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID",
+      messageParameters = Map("changelogName" -> changelogName))
+  }
+
   def invalidCdcOptionConflictingRangeTypes(): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_CDC_OPTION.CONFLICTING_RANGE_TYPES",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index a1f19e0d9b90a..dc07c2a595619 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -666,6 +666,14 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       summary = "")
   }
 
+  def stInvalidArgumentErrorInvalidEndiannessValue(
+      endianness: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "ST_INVALID_ENDIANNESS_VALUE",
+      messageParameters = Map("endianness" -> endianness)
+    )
+  }
+
   def stInvalidSridValueError(srid: String): SparkIllegalArgumentException = {
     new SparkIllegalArgumentException(
       errorClass = "ST_INVALID_SRID_VALUE",
@@ -686,6 +694,17 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
     wkbParseError(msg, pos.toString)
   }
 
+  def cannotMutateReadOnlyGeoValueError(): SparkRuntimeException = {
+    // This guards an internal invariant: setSrid mutates the backing buffer in place, which only
+    // works when the value owns a tight on-heap array. It is never reachable from user input (the
+    // only caller copies first), so a misuse here is a Spark bug, hence INTERNAL_ERROR.
+    new SparkRuntimeException(
+      errorClass = "INTERNAL_ERROR",
+      messageParameters = Map("message" ->
+        ("setSrid requires a value that owns its backing buffer; call copy() before mutating a " +
+          "value read directly from an UnsafeRow / ColumnVector buffer.")))
+  }
+
   def withSuggestionIntervalArithmeticOverflowError(
       suggestedFunc: String,
       context: QueryContext): ArithmeticException = {
@@ -1194,6 +1213,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       new java.util.HashMap[String, String]())
   }
 
+  def cannotAcquireMemoryForWindowAggregateError(
+      requestedBytes: Long,
+      receivedBytes: Long): SparkOutOfMemoryError = {
+    new SparkOutOfMemoryError(
+      "UNABLE_TO_ACQUIRE_MEMORY",
+      java.util.Map.of(
+        "requestedBytes", requestedBytes.toString,
+        "receivedBytes", receivedBytes.toString))
+  }
+
   def rowLargerThan256MUnsupportedError(): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_2108")
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index b59732f4820e6..df5ea031e79ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -473,7 +473,7 @@ private[arrow] class GeographyWriter(
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
     valueVector.setIndexDefined(count)
 
-    val geom = STUtils.deserializeGeog(input.getGeography(ordinal), dt)
+    val geom = STUtils.deserializeGeog(input.getBinaryView(ordinal), dt)
     val bytes = geom.getBytes
     val srid = geom.getSrid
 
@@ -491,7 +491,7 @@ private[arrow] class GeometryWriter(
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
     valueVector.setIndexDefined(count)
 
-    val geom = STUtils.deserializeGeom(input.getGeometry(ordinal), dt)
+    val geom = STUtils.deserializeGeom(input.getBinaryView(ordinal), dt)
     val bytes = geom.getBytes
     val srid = geom.getSrid
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala
index 8521df3db2ff0..ec45f1f373177 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala
@@ -19,9 +19,11 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.{EnumSet => JEnumSet, Set => JSet}
 
-import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogInfo, Column, SupportsRead, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogContext, Column, SupportsRead, Table, TableCapability}
 import org.apache.spark.sql.connector.catalog.TableCapability.{BATCH_READ, MICRO_BATCH_READ}
 import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{DataType, LongType, StringType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
@@ -33,7 +35,12 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  */
 case class ChangelogTable(
     changelog: Changelog,
-    changelogInfo: ChangelogInfo) extends Table with SupportsRead {
+    changelogContext: ChangelogContext,
+    resolved: Boolean = false) extends Table with SupportsRead {
+
+  // Validate that the connector returned a schema with the required CDC metadata columns
+  // and correct types.
+  ChangelogTable.validateSchema(changelog)
 
   override def name: String = changelog.name
 
@@ -45,3 +52,52 @@ case class ChangelogTable(
 
   override def capabilities: JSet[TableCapability] = JEnumSet.of(BATCH_READ, MICRO_BATCH_READ)
 }
+
+object ChangelogTable {
+
+  private[v2] def validateSchema(cl: Changelog): Unit = {
+    val byName = cl.columns.map(c => c.name -> c).toMap
+    def check(name: String, expected: DataType*): Unit = {
+      val col = byName.getOrElse(name,
+        throw QueryCompilationErrors.changelogMissingColumnError(cl.name, name))
+      if (expected.nonEmpty && col.dataType != expected.head) {
+        throw QueryCompilationErrors.changelogInvalidColumnTypeError(
+          cl.name, name, expected.head.sql, col.dataType.sql)
+      }
+    }
+    check("_change_type", StringType)
+    // `_commit_version` must be either `LongType` or `StringType`. Connectors must
+    // additionally guarantee that the column's natural ordering (numeric /
+    // lexicographic) matches commit order, because the netChanges post-processing path
+    // sorts rows by this column. These two types cover every realistic CDC source;
+    // broader atomic types like `IntegerType` are strict subsets of `LongType`, and
+    // `TimestampType` duplicates the role of `_commit_timestamp`. The narrower
+    // contract can always be relaxed later (relaxing is non-breaking; restricting is
+    // not).
+    val versionCol = byName.getOrElse("_commit_version",
+      throw QueryCompilationErrors.changelogMissingColumnError(cl.name, "_commit_version"))
+    if (versionCol.dataType != LongType && versionCol.dataType != StringType) {
+      throw QueryCompilationErrors.changelogInvalidColumnTypeError(
+        cl.name, "_commit_version", "BIGINT or STRING", versionCol.dataType.sql)
+    }
+    check("_commit_timestamp", TimestampType)
+
+    // Only call `rowId()` / `rowVersion()` when a capability requires them; a connector
+    // that advertises a capability without overriding the method surfaces the default
+    // UnsupportedOperationException directly.
+    val needsRowId = cl.containsCarryoverRows() ||
+      cl.representsUpdateAsDeleteAndInsert() ||
+      cl.containsIntermediateChanges()
+    if (needsRowId) {
+      val rowIds = cl.rowId()
+      if (rowIds == null || rowIds.isEmpty) {
+        throw QueryCompilationErrors.changelogMissingRowIdError(cl.name)
+      }
+    }
+    val needsRowVersion = cl.containsCarryoverRows() ||
+      cl.representsUpdateAsDeleteAndInsert()
+    if (needsRowVersion) {
+      cl.rowVersion()
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 99a5339950a58..b69e0428630c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -43,9 +43,11 @@ import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.connector.catalog.PathElement.PathRef
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{AtomicType, TimestampNTZType, TimestampType}
 import org.apache.spark.storage.{StorageLevel, StorageLevelMapper}
@@ -155,6 +157,13 @@ object SQLConf {
     override def initialValue: SQLConf = null
   }
 
+  /**
+   * Returns the [[SQLConf]] installed by an outer [[withExistingConf]] scope, or [[None]] if
+   * there is no such scope. Unlike [[get]], this peeks directly at the threadlocal so callers
+   * can distinguish "no outer scope" from "outer scope happens to install the same conf".
+   */
+  def getExistingConfIfSet: Option[SQLConf] = Option(existingConf.get())
+
   def withExistingConf[T](conf: SQLConf)(f: => T): T = {
     val old = existingConf.get()
     existingConf.set(conf)
@@ -623,7 +632,7 @@ object SQLConf {
       .doc("When true, enables geospatial types (GEOGRAPHY/GEOMETRY) and ST functions.")
       .version("4.1.0")
       .booleanConf
-      .createWithDefaultFunction(() => Utils.isTesting)
+      .createWithDefault(true)
 
   val TYPES_FRAMEWORK_ENABLED =
     buildConf("spark.sql.types.framework.enabled")
@@ -746,15 +755,15 @@ object SQLConf {
 
   val RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED =
     buildConf("spark.sql.optimizer.runtime.rowLevelOperationGroupFilter.enabled")
-      .doc("Enables runtime group filtering for group-based row-level operations. " +
-        "Data sources that replace groups of data (e.g. files, partitions) may prune entire " +
-        "groups using provided data source filters when planning a row-level operation scan. " +
-        "However, such filtering is limited as not all expressions can be converted into data " +
-        "source filters and some expressions can only be evaluated by Spark (e.g. subqueries). " +
-        "Since rewriting groups is expensive, Spark can execute a query at runtime to find what " +
-        "records match the condition of the row-level operation. The information about matching " +
-        "records will be passed back to the row-level operation scan, allowing data sources to " +
-        "discard groups that don't have to be rewritten.")
+      .doc("Enables runtime filtering for group-based and delta-based row-level operations. " +
+        "Data sources may prune entire file groups at runtime when planning a row-level " +
+        "operation scan. Planning-time filter pushdown is limited as not all expressions can " +
+        "be converted into data source filters and some expressions can only be evaluated by " +
+        "Spark (e.g. subqueries). Since rewriting groups or scanning unnecessary files is " +
+        "expensive, Spark can execute a lightweight query at runtime to find what records match " +
+        "the condition of the row-level operation. The information about matching records will " +
+        "be passed back to the row-level operation scan, allowing data sources to skip files " +
+        "that don't have to be processed.")
       .version("3.4.0")
       .booleanConf
       .createWithDefault(true)
@@ -1141,16 +1150,6 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val TRIM_COLLATION_ENABLED =
-    buildConf("spark.sql.collation.trim.enabled")
-      .internal()
-      .doc("When enabled allows the use of trim collations which trim trailing whitespaces from" +
-        " strings."
-      )
-      .version("4.0.0")
-      .booleanConf
-      .createWithDefault(true)
-
   val COLLATION_AWARE_HASHING_ENABLED =
     buildConf("spark.sql.legacy.collationAwareHashFunctions")
       .internal()
@@ -1332,6 +1331,19 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val SUBEXPRESSION_ELIMINATION_FILTER_EXEC_ENABLED =
+    buildConf("spark.sql.subexpressionElimination.filterExec.enabled")
+      .internal()
+      .doc("When true (and subexpression elimination is enabled), FilterExec whole-stage " +
+        "codegen eliminates common subexpressions shared across its predicates. When false, " +
+        "FilterExec falls back to the predicate codegen that loads input columns lazily and " +
+        "short-circuits, avoiding eager materialization of all predicate-referenced columns on " +
+        "every row. Only affects FilterExec; subexpression elimination elsewhere is unaffected.")
+      .version("4.2.0")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .booleanConf
+      .createWithDefault(true)
+
   val CASE_SENSITIVE = buildConf(SqlApiConfHelper.CASE_SENSITIVE_KEY)
     .internal()
     .doc("Whether the query analyzer should be case sensitive or not. " +
@@ -2329,17 +2341,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
-  val VIEW_SCHEMA_EVOLUTION_PRESERVE_USER_COMMENTS =
-    buildConf("spark.sql.view.schemaEvolution.preserveUserComments")
-      .internal()
-      .doc("When enabled, views with SCHEMA EVOLUTION mode will preserve user-set view comments " +
-        "when the underlying table schema evolves. When disabled, view comments will be " +
-        "overwritten with table comments on every schema sync.")
-      .version("4.2.0")
-      .withBindingPolicy(ConfigBindingPolicy.SESSION)
-      .booleanConf
-      .createWithDefault(true)
-
   val OUTPUT_COMMITTER_CLASS = buildConf("spark.sql.sources.outputCommitterClass")
     .version("1.4.0")
     .internal()
@@ -2461,6 +2462,29 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val DEFAULT_PATH =
+    buildConf("spark.sql.defaultPath")
+      .version("4.2.0")
+      .doc("Default SQL PATH used when no SET PATH has been issued in the session; this is " +
+        "also the value to which `SET PATH = DEFAULT_PATH` expands. Accepts the full SET PATH " +
+        "grammar; an inner DEFAULT_PATH token resolves to the spark-builtin default ordering. " +
+        "The PATH keyword is not allowed in this conf value. " +
+        "When empty, the spark-builtin default ordering controlled by " +
+        "`spark.sql.functionResolution.sessionOrder` applies. Validated for syntax at set time; " +
+        "redundant entries are tolerated (lookup uses first-match resolution). The interactive " +
+        "SET PATH form still rejects static duplicates as a typo guard.")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .stringConf
+      .checkValue(
+        v =>
+          v == null || v.trim.isEmpty ||
+            Try(CatalystSqlParser.parsePathElements(v.trim))
+              .toOption
+              .exists(!_.contains(PathRef)),
+        "The value must be empty or a comma-separated SET PATH element list " +
+          "(same grammar as SET PATH, except PATH is not allowed).")
+      .createWithDefault("")
+
   // Whether to retain group by columns or not in GroupedData.agg.
   val DATAFRAME_RETAIN_GROUP_COLUMNS = buildConf("spark.sql.retainGroupColumns")
     .version("1.4.0")
@@ -2506,6 +2530,35 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val WHOLESTAGE_UNION_CODEGEN_ENABLED =
+    buildConf("spark.sql.codegen.wholeStage.union.enabled")
+      .internal()
+      .doc("When both this conf and `spark.sql.codegen.wholeStage` are true, " +
+        "UnionExec participates in whole-stage codegen on its " +
+        "non-partitioning-aware path: the parent and all children fuse into " +
+        "a single WholeStageCodegenExec stage.")
+      .version("4.2.0")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .booleanConf
+      .createWithDefault(false)
+
+  val WHOLESTAGE_UNION_MAX_CHILDREN =
+    buildConf("spark.sql.codegen.wholeStage.union.maxChildren")
+      .internal()
+      .doc("Maximum number of UnionExec children eligible for whole-stage " +
+        "codegen fusion. Each child is emitted as its own helper method, so " +
+        "this conf bounds class-level costs of the fused stage (total " +
+        "bytecode size, constant pool growth, JIT compilation time) rather " +
+        "than the JVM per-method bytecode limit. Unions with more children " +
+        "fall back to per-child codegen stages. Only effective when " +
+        s"`${WHOLESTAGE_UNION_CODEGEN_ENABLED.key}` is true.")
+      .version("4.2.0")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .intConf
+      .checkValue(v => v >= 2,
+        "The value of spark.sql.codegen.wholeStage.union.maxChildren must be >= 2")
+      .createWithDefault(64)
+
   val WHOLESTAGE_MAX_NUM_FIELDS = buildConf("spark.sql.codegen.maxFields")
     .internal()
     .doc("The maximum number of fields (including nested fields) that will be supported before" +
@@ -3143,6 +3196,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val ENABLE_STREAMING_SINK_EVOLUTION =
+    buildConf("spark.sql.streaming.queryEvolution.enableSinkEvolution")
+      .internal()
+      .doc("When true, streaming sinks can be named using the name() API on DataStreamWriter. " +
+        "This enables sink evolution capability where sinks can be changed while maintaining " +
+        "a historical record of all sinks used in the checkpoint.")
+      .version("4.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val STREAMING_CHECK_UNFINISHED_REPARTITION_ON_RESTART =
     buildConf("spark.sql.streaming.checkUnfinishedRepartitionOnRestart")
       .internal()
@@ -3558,6 +3621,19 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_ALLOW_UDF_PARAMETER_TO_SHADOW_PARAMETERLESS_FUNCTION =
+    buildConf("spark.sql.legacy.allowUdfParameterToShadowParameterlessFunction")
+      .internal()
+      .doc("When true (legacy behavior), a SQL UDF parameter alias shadows a parameterless " +
+        "built-in function (current_user, current_date, current_time, current_timestamp, " +
+        "user, session_user, grouping__id) of the same name. When false (the default), the " +
+        "parameterless built-in function takes precedence, matching the documented name " +
+        "resolution rules.")
+      .version("4.2.0")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .booleanConf
+      .createWithDefault(false)
+
   val ALLOW_NON_EMPTY_LOCATION_IN_CTAS =
     buildConf("spark.sql.legacy.allowNonEmptyLocationInCTAS")
       .internal()
@@ -4086,6 +4162,51 @@ object SQLConf {
         "The threshold of window group limit must be -1, 0 or positive integer.")
       .createWithDefault(1000)
 
+  val WINDOW_SEGMENT_TREE_ENABLED =
+    buildConf("spark.sql.window.segmentTree.enabled")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .doc("Use block-chunked segment tree for moving aggregate window frames " +
+        "whose functions are all DeclarativeAggregate without FILTER/DISTINCT.")
+      .version("4.2.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS =
+    buildConf("spark.sql.window.segmentTree.minPartitionRows")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .doc("Minimum partition row count to activate the segment-tree moving frame. " +
+        "Partitions smaller than this fall back to the default sliding implementation.")
+      .version("4.2.0")
+      .internal()
+      .intConf
+      .checkValue(_ > 0, "minPartitionRows must be > 0")
+      .createWithDefault(64)
+
+  val WINDOW_SEGMENT_TREE_BLOCK_SIZE =
+    buildConf("spark.sql.window.segmentTree.blockSize")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .doc("Block size, in rows, for the block-chunked segment tree used by moving " +
+        "window frames. Each leaf of the tree aggregates this many consecutive rows. " +
+        "Smaller values reduce per-partition memory and speed up tree build for small " +
+        "partitions, but make the tree deeper and increase query cost for wide frames. " +
+        "Larger values amortize build cost and shrink the tree but increase the per-block " +
+        "prefix/suffix scan cost within a block. The default is tuned for partitions on " +
+        "the order of tens of thousands to millions of rows.")
+      .version("4.2.0")
+      .intConf
+      .checkValue(_ >= 16, "blockSize must be >= 16")
+      .createWithDefault(65536)
+
+  val WINDOW_SEGMENT_TREE_FANOUT =
+    buildConf("spark.sql.window.segmentTree.fanout")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .doc("Fanout of internal nodes for the block-chunked segment tree.")
+      .version("4.2.0")
+      .internal()
+      .intConf
+      .checkValue(_ >= 2, "fanout must be >= 2")
+      .createWithDefault(16)
+
   val SESSION_WINDOW_BUFFER_IN_MEMORY_THRESHOLD =
     buildConf("spark.sql.sessionWindow.buffer.in.memory.threshold")
       .internal()
@@ -6050,6 +6171,19 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val VARIANT_VALIDATE_UNICODE_IN_JSON_PARSING =
+    buildConf("spark.sql.variant.validateUnicodeInJsonParsing")
+      .internal()
+      .doc("When true, parsing variant from JSON rejects strings that contain unpaired UTF-16 " +
+        "surrogate code units (such as a lone high surrogate like \\uD835), which are invalid " +
+        "per RFC 8259 section 7. When false, restores the legacy permissive behavior in which " +
+        "the unpaired surrogate is silently replaced by the Unicode replacement character " +
+        "during UTF-8 encoding, causing data corruption that diverges from strict JSON parsers.")
+      .version("4.3.0")
+      .withBindingPolicy(ConfigBindingPolicy.SESSION)
+      .booleanConf
+      .createWithDefault(true)
+
   val VARIANT_ALLOW_READING_SHREDDED =
     buildConf("spark.sql.variant.allowReadingShredded")
       .internal()
@@ -6593,15 +6727,29 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED =
-    buildConf("spark.sql.optimizer.mergeSubplans.symmetricFilterPropagation.enabled")
-      .doc("When set to true, two non-grouping aggregate subplans that both have filter " +
-        "conditions (but with different predicates) can be merged into a single scan using " +
-        "FILTER (WHERE ...) clauses on each aggregate expression. " +
-        "Merging two filtered scans broadens the combined filter to OR(f1, f2), which may " +
-        "reduce IO pruning (e.g. partition or file skipping) compared to the individual " +
-        "filters. Disabled by default; enable once the behaviour has been validated in your " +
-        "workload, particularly on heavily partitioned or file-pruned tables. " +
+  val MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED = buildConf(
+    "spark.sql.optimizer.mergeSubplans.filterPropagation.symmetricFilterPropagation.enabled")
+    .doc("When set to true, two non-grouping aggregate subplans that both have filter " +
+      "conditions (but with different predicates) can be merged into a single scan using " +
+      "FILTER (WHERE ...) clauses on each aggregate expression. " +
+      "Merging two filtered scans broadens the combined filter to OR(f1, f2), which may " +
+      "reduce IO pruning (e.g. partition or file skipping) compared to the individual " +
+      "filters. Disabled by default; enable once the behaviour has been validated in your " +
+      "workload, particularly on heavily partitioned or file-pruned tables. " +
+      s"Has no effect when ${MERGE_SUBPLANS_FILTER_PROPAGATION_ENABLED.key} is false.")
+    .version("4.2.0")
+    .withBindingPolicy(ConfigBindingPolicy.SESSION)
+    .booleanConf
+    .createWithDefault(false)
+
+  val MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED =
+    buildConf("spark.sql.optimizer.mergeSubplans.filterPropagation.throughJoin.enabled")
+      .doc("When set to true, filter attributes can propagate through Join nodes during subplan " +
+        "merging, allowing subplans that differ only in their filter conditions and share a " +
+        "common join to be merged into a single scan. A filter attribute is only propagated " +
+        "through a join when it originates from the non-nullable (preserved) side: the left side " +
+        "of LeftOuter/LeftSemi/LeftAnti, the right side of RightOuter, or either side of " +
+        "Inner/Cross. FullOuter joins are never eligible. " +
         s"Has no effect when ${MERGE_SUBPLANS_FILTER_PROPAGATION_ENABLED.key} is false.")
       .version("4.2.0")
       .withBindingPolicy(ConfigBindingPolicy.SESSION)
@@ -7203,6 +7351,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val INSERT_INTO_NESTED_TYPE_COERCION_ENABLED =
+    buildConf("spark.sql.insertNestedTypeCoercion.enabled")
+      .internal()
+      .doc("If enabled, allow INSERT INTO WITH SCHEMA EVOLUTION to fill missing nested " +
+        "struct fields with null when the source has fewer nested fields than the target " +
+        "table. Also relaxes by-position column-count enforcement so trailing missing " +
+        "top-level columns are filled with their default value (or null). This is " +
+        "experimental and the semantics may change.")
+      .version("4.2.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val TIME_TYPE_ENABLED =
     buildConf("spark.sql.timeType.enabled")
       .internal()
@@ -7613,8 +7773,6 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def schemaLevelCollationsEnabled: Boolean = getConf(SCHEMA_LEVEL_COLLATIONS_ENABLED)
 
-  def trimCollationEnabled: Boolean = getConf(TRIM_COLLATION_ENABLED)
-
   def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
 
   def adaptiveExecutionEnabledInStatelessStreaming: Boolean =
@@ -7659,6 +7817,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def enableStreamingSourceEvolution: Boolean = getConf(ENABLE_STREAMING_SOURCE_EVOLUTION)
 
+  def enableStreamingSinkEvolution: Boolean = getConf(ENABLE_STREAMING_SINK_EVOLUTION)
+
   def streamingCheckUnfinishedRepartitionOnRestart: Boolean =
     getConf(STREAMING_CHECK_UNFINISHED_REPARTITION_ON_RESTART)
 
@@ -7849,6 +8009,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def subexpressionEliminationSkipForShotcutExpr: Boolean =
     getConf(SUBEXPRESSION_ELIMINATION_SKIP_FOR_SHORTCUT_EXPR)
 
+  def subexpressionEliminationFilterExecEnabled: Boolean =
+    getConf(SUBEXPRESSION_ELIMINATION_FILTER_EXEC_ENABLED)
+
   def autoBroadcastJoinThreshold: Long = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
 
   def limitInitialNumPartitions: Int = getConf(LIMIT_INITIAL_NUM_PARTITIONS)
@@ -7980,9 +8143,6 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def viewSchemaCompensation: Boolean = getConf(VIEW_SCHEMA_COMPENSATION)
 
-  def viewSchemaEvolutionPreserveUserComments: Boolean =
-    getConf(VIEW_SCHEMA_EVOLUTION_PRESERVE_USER_COMMENTS)
-
   def defaultCacheStorageLevel: StorageLevel =
     StorageLevel.fromString(getConf(DEFAULT_CACHE_STORAGE_LEVEL).name())
 
@@ -8040,6 +8200,14 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def windowGroupLimitThreshold: Int = getConf(WINDOW_GROUP_LIMIT_THRESHOLD)
 
+  def windowSegmentTreeEnabled: Boolean = getConf(WINDOW_SEGMENT_TREE_ENABLED)
+
+  def windowSegmentTreeMinPartitionRows: Int = getConf(WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS)
+
+  def windowSegmentTreeBlockSize: Int = getConf(WINDOW_SEGMENT_TREE_BLOCK_SIZE)
+
+  def windowSegmentTreeFanout: Int = getConf(WINDOW_SEGMENT_TREE_FANOUT)
+
   def sessionWindowBufferInMemoryThreshold: Int = getConf(SESSION_WINDOW_BUFFER_IN_MEMORY_THRESHOLD)
 
   def sessionWindowBufferSpillThreshold: Int = getConf(SESSION_WINDOW_BUFFER_SPILL_THRESHOLD)
@@ -8449,6 +8617,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def pathEnabled: Boolean = getConf(SQLConf.PATH_ENABLED)
 
+  def defaultPath: String = getConf(SQLConf.DEFAULT_PATH)
+
   /**
    * Returns the resolution search path for error messages and resolution order.
    * This is the single source of truth for the search path used for functions, tables, and views.
@@ -8460,8 +8630,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   /**
    * Orders the given catalog path entries by [[sessionFunctionResolutionOrder]], inserting
-   * system.session and system.builtin. Used by both the legacy single-schema resolution and
-   * by SET PATH's DEFAULT_PATH / SYSTEM_PATH expansion to keep ordering in sync.
+   * system.session and system.builtin. Used by the legacy single-schema resolution and by
+   * SET PATH's DEFAULT_PATH expansion (when `spark.sql.defaultPath` is empty) to keep
+   * ordering in sync. SYSTEM_PATH no longer flows through here -- see [[systemPathOrder]].
    *
    * @param catalogEntries persistent catalog path entries (may be empty).
    */
@@ -8482,8 +8653,13 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
     }
   }
 
-  /** System-only path (builtin + session) ordered by [[sessionFunctionResolutionOrder]]. */
-  def systemPathOrder: Seq[Seq[String]] = defaultPathOrder(Seq.empty)
+  /**
+   * System-only path used by `SET PATH = SYSTEM_PATH`. Contains the system-managed namespaces
+   * under the `system` catalog whose contents are wholly defined by Spark itself; today that
+   * is only `system.builtin`, but the shortcut is reserved for future system-managed schemas
+   * (e.g. AI, geospatial, ML).
+   */
+  def systemPathOrder: Seq[Seq[String]] = Seq(Seq("system", "builtin"))
 
   override def legacyParameterSubstitutionConstantsOnly: Boolean =
     getConf(SQLConf.LEGACY_PARAMETER_SUBSTITUTION_CONSTANTS_ONLY)
@@ -8527,6 +8703,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def coerceMergeNestedTypes: Boolean =
     getConf(SQLConf.MERGE_INTO_NESTED_TYPE_COERCION_ENABLED)
 
+  def coerceInsertNestedTypes: Boolean =
+    getConf(SQLConf.INSERT_INTO_NESTED_TYPE_COERCION_ENABLED)
+
   def isTimeTypeEnabled: Boolean = getConf(SQLConf.TIME_TYPE_ENABLED)
 
   def listaggAllowDistinctCastWithOrder: Boolean = getConf(LISTAGG_ALLOW_DISTINCT_CAST_WITH_ORDER)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
index 47889410561e3..f97f90b7eb590 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.internal.connector
 
+import java.util.Objects
+
 import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue, IdentityColumnSpec}
 import org.apache.spark.sql.types.DataType
 
@@ -29,4 +31,37 @@ case class ColumnImpl(
     defaultValue: ColumnDefaultValue,
     generationExpression: String,
     identityColumnSpec: IdentityColumnSpec,
-    metadataInJSON: String) extends Column
+    metadataInJSON: String,
+    override val id: String = null) extends Column {
+
+  // [[id]] is excluded from [[equals]] and [[hashCode]] because IDs only live on [[Column]],
+  // not on [[StructField]] metadata. Any code path that round-trips through [[StructType]]
+  // (e.g. [[CatalogV2Util.v2ColumnsToStructType]] followed by [[structTypeToV2Columns]])
+  // drops the ID, producing a [[Column]] with id=null for the same logical column. Including
+  // [[id]] in equality would cause spurious mismatches across these round-trips.
+  // Column ID validation is performed separately by [[V2TableUtil.validateColumnIds]].
+  override def equals(other: Any): Boolean = other match {
+    case that: ColumnImpl =>
+      name == that.name &&
+        dataType == that.dataType &&
+        nullable == that.nullable &&
+        comment == that.comment &&
+        defaultValue == that.defaultValue &&
+        generationExpression == that.generationExpression &&
+        identityColumnSpec == that.identityColumnSpec &&
+        metadataInJSON == that.metadataInJSON
+    case _ => false
+  }
+
+  override def hashCode(): Int = {
+    Objects.hash(
+      name,
+      dataType,
+      Boolean.box(nullable),
+      comment,
+      defaultValue,
+      generationExpression,
+      identityColumnSpec,
+      metadataInJSON)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/logical/metricViewNodes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/logical/metricViewNodes.scala
index a7fa037a4b33f..76163794db33d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/logical/metricViewNodes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/logical/metricViewNodes.scala
@@ -19,14 +19,62 @@ package org.apache.spark.sql.metricview.logical
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryCommand, UnaryNode}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{METRIC_VIEW_PLACEHOLDER, RESOLVED_METRIC_VIEW, TreePattern}
-import org.apache.spark.sql.metricview.serde.MetricView
+import org.apache.spark.sql.types.Metadata
+
+/**
+ * A parsed metric-view column, populated by
+ * [[org.apache.spark.sql.metricview.util.MetricViewPlanner]] from the YAML definition before the
+ * placeholder is handed to the analyzer. Carrying the parsed [[Expression]] (rather than the raw
+ * YAML descriptor) lets downstream resolution rules read a stable, analyzer-friendly
+ * representation without re-parsing.
+ */
+sealed trait InputColumn {
+  def name: String
+  def expr: Expression
+  def metadata: Metadata
+}
+
+case class DimensionInputColumn(
+    name: String,
+    expr: Expression,
+    metadata: Metadata) extends InputColumn
+
+case class MeasureInputColumn(
+    name: String,
+    expr: Expression,
+    metadata: Metadata) extends InputColumn
+
+/**
+ * Logical plan for `CREATE VIEW ... WITH METRICS`. This is the v1/v2-agnostic representation
+ * the parser returns; downstream analysis decides which runnable form it becomes:
+ *  - For the session catalog: [[org.apache.spark.sql.execution.command.CreateMetricViewCommand]]
+ *    via an analyzer rule that fires once the identifier is resolved.
+ *  - For non-session v2 [[org.apache.spark.sql.connector.catalog.ViewCatalog]]s: a
+ *    `CreateV2MetricViewExec` produced by `DataSourceV2Strategy`.
+ *
+ * Splitting this from the runnable command lets the parser return a single logical shape
+ * regardless of target catalog (instead of pre-committing to a runnable command at parse
+ * time), and gives downstream rules a single match target to dispatch from.
+ */
+case class CreateMetricView(
+    child: LogicalPlan,
+    userSpecifiedColumns: Seq[(String, Option[String])],
+    comment: Option[String],
+    properties: Map[String, String],
+    originalText: String,
+    allowExisting: Boolean,
+    replace: Boolean) extends UnaryCommand {
+  override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
+    copy(child = newChild)
+  }
+}
 
 case class MetricViewPlaceholder(
     metadata: CatalogTable,
-    desc: MetricView,
+    inputColumns: Seq[InputColumn],
     outputMetrics: Seq[Attribute],
     child: LogicalPlan,
     isCreate: Boolean = false) extends UnaryNode {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/serde/MetricViewCanonical.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/serde/MetricViewCanonical.scala
index 2e76a13741d09..1b4718ebd385e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/serde/MetricViewCanonical.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/serde/MetricViewCanonical.scala
@@ -94,7 +94,7 @@ private[sql] object Source {
     if (sourceText.isEmpty) {
       throw MetricViewValidationException("Source cannot be empty")
     }
-    Try(CatalystSqlParser.parseTableIdentifier(sourceText)) match {
+    Try(CatalystSqlParser.parseMultipartIdentifier(sourceText)) match {
       case Success(_) => AssetSource(sourceText)
       case Failure(_) =>
         Try(CatalystSqlParser.parseQuery(sourceText)) match {
@@ -167,4 +167,42 @@ private[sql] case class MetricView(
     version: String,
     from: Source,
     where: Option[String] = None,
-    select: Seq[Column])
+    select: Seq[Column]) {
+
+  /**
+   * Returns a set of table properties describing this metric view's source and
+   * filter clauses. Mirrors the property keys used by the canonical metric view
+   * representation on other Spark platforms so consumers of the catalog see a
+   * consistent property layout.
+   *
+   * Note: `metric_view.from.sql` and `metric_view.where` values are truncated to
+   * [[Constants.MAXIMUM_PROPERTY_SIZE]] characters, so these are descriptive values
+   * for catalog browsers / lineage tooling -- not round-trippable representations
+   * of the source. Consumers that need the full SQL or filter expression for
+   * re-execution should read [[ViewInfo#queryText]] (the YAML body) and re-parse it
+   * rather than reconstruct the query from these properties; for any source whose
+   * SQL exceeds the size limit, this property would silently return a truncated
+   * string.
+   */
+  def getProperties: Map[String, String] = {
+    val base = Map(MetricView.PROP_FROM_TYPE -> from.sourceType.toString)
+    val fromProps = from match {
+      case asset: AssetSource =>
+        base + (MetricView.PROP_FROM_NAME -> asset.name)
+      case sql: SQLSource =>
+        base + (MetricView.PROP_FROM_SQL -> MetricView.truncate(sql.sql))
+    }
+    where.fold(fromProps)(w =>
+      fromProps + (MetricView.PROP_WHERE -> MetricView.truncate(w)))
+  }
+}
+
+private[sql] object MetricView {
+  final val PROP_FROM_TYPE = "metric_view.from.type"
+  final val PROP_FROM_NAME = "metric_view.from.name"
+  final val PROP_FROM_SQL = "metric_view.from.sql"
+  final val PROP_WHERE = "metric_view.where"
+
+  private def truncate(value: String): String =
+    value.take(Constants.MAXIMUM_PROPERTY_SIZE)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/util/MetricViewPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/util/MetricViewPlanner.scala
index 121d908eda90b..ed7713819c150 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/util/MetricViewPlanner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/metricview/util/MetricViewPlanner.scala
@@ -24,24 +24,26 @@ import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.metricview.logical.MetricViewPlaceholder
-import org.apache.spark.sql.metricview.serde.{AssetSource, MetricView, MetricViewFactory, MetricViewValidationException, MetricViewYAMLParsingException, SQLSource}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.metricview.logical.{DimensionInputColumn, InputColumn, MeasureInputColumn, MetricViewPlaceholder}
+import org.apache.spark.sql.metricview.serde.{AssetSource, DimensionExpression, JsonUtils, MeasureExpression, MetricView, MetricViewFactory, MetricViewValidationException, MetricViewYAMLParsingException, SQLSource}
+import org.apache.spark.sql.types.{Metadata, StructType}
 
 object MetricViewPlanner {
 
   def planWrite(
       metadata: CatalogTable,
       yaml: String,
-      sqlParser: ParserInterface): MetricViewPlaceholder = {
+      sqlParser: ParserInterface): (MetricViewPlaceholder, MetricView) = {
     val (metricView, dataModelPlan) = parseYAML(yaml, sqlParser)
-    MetricViewPlaceholder(
+    val inputColumns = buildInputColumns(metricView, sqlParser)
+    val placeholder = MetricViewPlaceholder(
       metadata,
-      metricView,
+      inputColumns,
       Seq.empty,
       dataModelPlan,
       isCreate = true
     )
+    (placeholder, metricView)
   }
 
   def planRead(
@@ -50,24 +52,50 @@ object MetricViewPlanner {
       sqlParser: ParserInterface,
       expectedSchema: StructType): MetricViewPlaceholder = {
     val (metricView, dataModelPlan) = parseYAML(yaml, sqlParser)
+    val inputColumns = buildInputColumns(metricView, sqlParser)
     MetricViewPlaceholder(
       metadata,
-      metricView,
+      inputColumns,
       DataTypeUtils.toAttributes(expectedSchema),
       dataModelPlan
     )
   }
 
+  /**
+   * Parses every column's `MeasureExpression` / `DimensionExpression` from the YAML descriptor
+   * into a typed [[InputColumn]] (with the SQL expression already parsed) so downstream
+   * resolution rules read a stable representation rather than re-parsing the YAML.
+   * Column metadata is converted once here from the canonical `ColumnMetadata` to Spark's
+   * `Metadata`, preserving the per-column annotations (e.g. dimension / measure type marker,
+   * source expression text) the resolver attaches to output attributes.
+   */
+  private def buildInputColumns(
+      metricView: MetricView,
+      sqlParser: ParserInterface): Seq[InputColumn] = {
+    metricView.select.map { col =>
+      val md = Metadata.fromJson(JsonUtils.toJson(col.getColumnMetadata))
+      col.expression match {
+        case DimensionExpression(expr) =>
+          DimensionInputColumn(col.name, sqlParser.parseExpression(expr), md)
+        case MeasureExpression(expr) =>
+          MeasureInputColumn(col.name, sqlParser.parseExpression(expr), md)
+      }
+    }
+  }
+
   private def parseYAML(
       yaml: String,
       sqlParser: ParserInterface): (MetricView, LogicalPlan) = {
     val metricView = try {
       MetricViewFactory.fromYAML(yaml)
     } catch {
+      // Both cases are user-correctable errors in the YAML body, not internal Spark bugs;
+      // surface them as `INVALID_METRIC_VIEW_YAML` AnalysisExceptions so the message is
+      // categorized as user input error rather than "please contact support".
       case e: MetricViewValidationException =>
-        throw QueryCompilationErrors.invalidLiteralForWindowDurationError()
+        throw QueryCompilationErrors.invalidMetricViewYamlError(e.getMessage, e)
       case e: MetricViewYAMLParsingException =>
-        throw QueryCompilationErrors.invalidLiteralForWindowDurationError()
+        throw QueryCompilationErrors.invalidMetricViewYamlError(e.getMessage, e)
     }
     val source = metricView.from match {
       case asset: AssetSource => UnresolvedRelation(sqlParser.parseMultipartIdentifier(asset.name))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 9f4ce6797cf84..2ecaf07efedf9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String}
+import org.apache.spark.unsafe.types.{BinaryView, UTF8String}
 
 class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
 
@@ -459,31 +459,45 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
   private val pointWkb: Array[Byte] = "010100000000000000000031400000000000001C40"
     .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
 
+  private val NDR: UTF8String = UTF8String.fromString("NDR")
+
   test("converting Geometry to GeometryType via convertToCatalyst") {
     val geom = Geometry.fromWKB(pointWkb, 0)
     val result = CatalystTypeConverters.convertToCatalyst(geom)
-    assert(result.isInstanceOf[GeometryVal])
-    val resultVal = result.asInstanceOf[GeometryVal]
-    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
-    assert(STUtils.stSrid(resultVal) === 0)
+    assert(result.isInstanceOf[BinaryView])
+    val resultVal = result.asInstanceOf[BinaryView]
+    assert(java.util.Arrays.equals(STUtils.stGeomAsBinary(resultVal, NDR), pointWkb))
+    assert(STUtils.stGeomSrid(resultVal) === 0)
   }
 
   test("converting Geometry with non-default SRID via convertToCatalyst") {
     val geom = Geometry.fromWKB(pointWkb, 4326)
     val result = CatalystTypeConverters.convertToCatalyst(geom)
-    assert(result.isInstanceOf[GeometryVal])
-    val resultVal = result.asInstanceOf[GeometryVal]
-    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
-    assert(STUtils.stSrid(resultVal) === 4326)
+    assert(result.isInstanceOf[BinaryView])
+    val resultVal = result.asInstanceOf[BinaryView]
+    assert(java.util.Arrays.equals(STUtils.stGeomAsBinary(resultVal, NDR), pointWkb))
+    assert(STUtils.stGeomSrid(resultVal) === 4326)
   }
 
   test("converting Geography to GeographyType via convertToCatalyst") {
     val geog = Geography.fromWKB(pointWkb, 4326)
     val result = CatalystTypeConverters.convertToCatalyst(geog)
-    assert(result.isInstanceOf[GeographyVal])
-    val resultVal = result.asInstanceOf[GeographyVal]
-    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
-    assert(STUtils.stSrid(resultVal) === 4326)
+    assert(result.isInstanceOf[BinaryView])
+    val resultVal = result.asInstanceOf[BinaryView]
+    assert(java.util.Arrays.equals(STUtils.stGeogAsBinary(resultVal, NDR), pointWkb))
+    assert(STUtils.stGeogSrid(resultVal) === 4326)
+  }
+
+  test("converting Geography with non-default SRID via convertToCatalyst") {
+    // Geography supports a variety of geographic SRIDs beyond the default 4326.
+    Seq(4267, 4269, 4612, 37001, 104030).foreach { srid =>
+      val geog = Geography.fromWKB(pointWkb, srid)
+      val result = CatalystTypeConverters.convertToCatalyst(geog)
+      assert(result.isInstanceOf[BinaryView])
+      val resultVal = result.asInstanceOf[BinaryView]
+      assert(java.util.Arrays.equals(STUtils.stGeogAsBinary(resultVal, NDR), pointWkb))
+      assert(STUtils.stGeogSrid(resultVal) === srid)
+    }
   }
 
   test("convertToCatalyst null handling for geospatial types") {
@@ -501,15 +515,28 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
       parameters = Map("srid" -> "1"))
   }
 
+  test("convertToCatalyst with Geography with invalid SRID") {
+    // Geography only accepts geographic SRIDs (e.g. 0 and 3857 are not geographic).
+    Seq(0, 1, 3857).foreach { invalidSrid =>
+      val geog = Geography.fromWKB(pointWkb, invalidSrid)
+      checkError(
+        exception = intercept[SparkIllegalArgumentException] {
+          CatalystTypeConverters.convertToCatalyst(geog)
+        },
+        condition = "ST_INVALID_SRID_VALUE",
+        parameters = Map("srid" -> invalidSrid.toString))
+    }
+  }
+
   test("createToCatalystConverter for GeometryType") {
     val gt = GeometryType(0)
     val converter = CatalystTypeConverters.createToCatalystConverter(gt)
     val geom = Geometry.fromWKB(pointWkb, 0)
     val result = converter(geom)
-    assert(result.isInstanceOf[GeometryVal])
-    val resultVal = result.asInstanceOf[GeometryVal]
-    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
-    assert(STUtils.stSrid(resultVal) === 0)
+    assert(result.isInstanceOf[BinaryView])
+    val resultVal = result.asInstanceOf[BinaryView]
+    assert(java.util.Arrays.equals(STUtils.stGeomAsBinary(resultVal, NDR), pointWkb))
+    assert(STUtils.stGeomSrid(resultVal) === 0)
   }
 
   test("createToCatalystConverter for GeographyType") {
@@ -517,10 +544,10 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val converter = CatalystTypeConverters.createToCatalystConverter(gt)
     val geog = Geography.fromWKB(pointWkb, 4326)
     val result = converter(geog)
-    assert(result.isInstanceOf[GeographyVal])
-    val resultVal = result.asInstanceOf[GeographyVal]
-    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
-    assert(STUtils.stSrid(resultVal) === 4326)
+    assert(result.isInstanceOf[BinaryView])
+    val resultVal = result.asInstanceOf[BinaryView]
+    assert(java.util.Arrays.equals(STUtils.stGeogAsBinary(resultVal, NDR), pointWkb))
+    assert(STUtils.stGeogSrid(resultVal) === 4326)
   }
 
   test("createToScalaConverter for GeometryType") {
@@ -579,9 +606,9 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val array = result.asInstanceOf[GenericArrayData]
     assert(array.numElements() === 1)
     val element = array.get(0, GeometryType("ANY"))
-    assert(element.isInstanceOf[GeometryVal])
+    assert(element.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+      STUtils.stGeomAsBinary(element.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 
   test("convertToCatalyst with Geometry nested in Array") {
@@ -591,9 +618,9 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val array = result.asInstanceOf[GenericArrayData]
     assert(array.numElements() === 1)
     val element = array.get(0, GeometryType("ANY"))
-    assert(element.isInstanceOf[GeometryVal])
+    assert(element.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+      STUtils.stGeomAsBinary(element.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 
   test("convertToCatalyst with Geometry nested in Map") {
@@ -602,9 +629,9 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     assert(result.isInstanceOf[ArrayBasedMapData])
     val mapData = result.asInstanceOf[ArrayBasedMapData]
     val value = mapData.valueArray.get(0, GeometryType("ANY"))
-    assert(value.isInstanceOf[GeometryVal])
+    assert(value.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(value.asInstanceOf[GeometryVal]), pointWkb))
+      STUtils.stGeomAsBinary(value.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 
   test("convertToCatalyst with Geometry nested in Row") {
@@ -612,9 +639,9 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val result = CatalystTypeConverters.convertToCatalyst(Row(geom))
     assert(result.isInstanceOf[InternalRow])
     val element = result.asInstanceOf[InternalRow].get(0, GeometryType("ANY"))
-    assert(element.isInstanceOf[GeometryVal])
+    assert(element.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+      STUtils.stGeomAsBinary(element.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 
   test("convertToCatalyst with Geography nested in Seq") {
@@ -624,9 +651,9 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val array = result.asInstanceOf[GenericArrayData]
     assert(array.numElements() === 1)
     val element = array.get(0, GeographyType("ANY"))
-    assert(element.isInstanceOf[GeographyVal])
+    assert(element.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+      STUtils.stGeogAsBinary(element.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 
   test("convertToCatalyst with Geography nested in Array") {
@@ -636,9 +663,9 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val array = result.asInstanceOf[GenericArrayData]
     assert(array.numElements() === 1)
     val element = array.get(0, GeographyType("ANY"))
-    assert(element.isInstanceOf[GeographyVal])
+    assert(element.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+      STUtils.stGeogAsBinary(element.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 
   test("convertToCatalyst with Geography nested in Map") {
@@ -647,9 +674,9 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     assert(result.isInstanceOf[ArrayBasedMapData])
     val mapData = result.asInstanceOf[ArrayBasedMapData]
     val value = mapData.valueArray.get(0, GeographyType("ANY"))
-    assert(value.isInstanceOf[GeographyVal])
+    assert(value.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(value.asInstanceOf[GeographyVal]), pointWkb))
+      STUtils.stGeogAsBinary(value.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 
   test("convertToCatalyst with Geography nested in Row") {
@@ -657,8 +684,8 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val result = CatalystTypeConverters.convertToCatalyst(Row(geog))
     assert(result.isInstanceOf[InternalRow])
     val element = result.asInstanceOf[InternalRow].get(0, GeographyType("ANY"))
-    assert(element.isInstanceOf[GeographyVal])
+    assert(element.isInstanceOf[BinaryView])
     assert(java.util.Arrays.equals(
-      STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+      STUtils.stGeogAsBinary(element.asInstanceOf[BinaryView], NDR), pointWkb))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index ee644fc62a1ab..69f56bf53cd1a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Count, Max}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.{AsOfJoinDirection, Cross, Inner, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.plans.{AsOfJoinDirection, Cross, Inner, LeftOuter, NearestBySimilarity, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.errors.DataTypeErrorsBase
 import org.apache.spark.sql.internal.SQLConf
@@ -924,6 +924,35 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           |Conflicting attributes: "a".""".stripMargin))
   }
 
+  test("NearestByJoin with a streaming input fails analysis") {
+    val streamingLeft = LocalRelation(
+      Seq(AttributeReference("a", IntegerType)()), Nil, isStreaming = true)
+    val batchRight = LocalRelation(AttributeReference("b", IntegerType)())
+    val nearestBy = NearestByJoin(
+      streamingLeft, batchRight, Inner, approx = true, numResults = 1,
+      rankingExpression = streamingLeft.output.head + batchRight.output.head,
+      direction = NearestBySimilarity)
+    assertAnalysisErrorCondition(
+      nearestBy,
+      expectedErrorCondition = "NEAREST_BY_JOIN.STREAMING_NOT_SUPPORTED",
+      expectedMessageParameters = Map.empty)
+  }
+
+  test("NearestByJoin is rejected when spark.sql.crossJoin.enabled is false") {
+    val left = LocalRelation(AttributeReference("a", IntegerType)())
+    val right = LocalRelation(AttributeReference("b", IntegerType)())
+    val nearestBy = NearestByJoin(
+      left, right, Inner, approx = true, numResults = 1,
+      rankingExpression = left.output.head + right.output.head,
+      direction = NearestBySimilarity)
+    withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
+      assertAnalysisErrorCondition(
+        nearestBy,
+        expectedErrorCondition = "NEAREST_BY_JOIN.CROSS_JOIN_NOT_ENABLED",
+        expectedMessageParameters = Map.empty)
+    }
+  }
+
   test("check grouping expression data types") {
     def checkDataType(dataType: DataType): Unit = {
       val plan =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index e25777b230cef..4a7adcc050a00 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1764,6 +1764,12 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     checkAnalysis(rel.select($"a"), rel.select(attr.markAsAllowAnyAccess()))
   }
 
+  test("SPARK-56714: __aggregated_access_only should not imply metadata column") {
+    val attr = $"a".int.markAsAggregatedAccessOnly()
+    assert(!attr.isMetadataCol)
+    assert(attr.aggregatedAccessOnly)
+  }
+
   test("SPARK-43030: deduplicate relations with duplicate aliases") {
     // Should not fail with the assertion failure: Found duplicate rewrite attributes.
     val alias = Alias($"a", "x")()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalyzerExtensionPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalyzerExtensionPropagationSuite.scala
new file mode 100644
index 0000000000000..d3dab281a30c6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalyzerExtensionPropagationSuite.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.resolver.{ResolverExtension, TreeNodeResolver}
+import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.{CatalogManager, DefaultCatalogManager}
+
+/**
+ * Verifies that [[Analyzer.withCatalogManager]] propagates all extension points.
+ *
+ * If this suite fails with an unexpected method count, a new extension point was added to
+ * [[Analyzer.withCatalogManager]] without being verified here. Add the corresponding assertion
+ * and update the expected count.
+ *
+ * If [[Analyzer]] gains a new extension point that is NOT yet in [[Analyzer.withCatalogManager]],
+ * add it there first, then update this suite.
+ */
+class AnalyzerExtensionPropagationSuite extends SparkFunSuite {
+
+  private val dummyRule: Rule[LogicalPlan] = new Rule[LogicalPlan] {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan
+  }
+
+  private val dummyCheck: LogicalPlan => Unit = (_: LogicalPlan) => ()
+
+  private val dummyExtension: ResolverExtension = new ResolverExtension {
+    override def resolveOperator(
+        operator: LogicalPlan,
+        resolver: TreeNodeResolver[LogicalPlan, LogicalPlan]): Option[LogicalPlan] = None
+  }
+
+  private def newCatalogManager(): CatalogManager =
+    new DefaultCatalogManager(
+      FakeV2SessionCatalog,
+      new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry))
+
+  test("withCatalogManager propagates all extension points") {
+    // Counts every declared field on Analyzer (backing fields for vals,
+    // constructor params, and fields inherited from mixed-in traits). When this assertion fails,
+    // a field was added to or removed from Analyzer. If the change is a new extension point,
+    // add it to Analyzer.withCatalogManager, add an assertion in the clone checks below,
+    // and update EXPECTED_FIELD_COUNT.
+    val EXPECTED_FIELD_COUNT = 12
+    val analyzerFields = classOf[Analyzer].getDeclaredFields
+      .filterNot(f => f.isSynthetic || f.getName.contains("$"))
+    assert(analyzerFields.length == EXPECTED_FIELD_COUNT,
+      s"Analyzer has ${analyzerFields.length} declared fields " +
+      s"(${analyzerFields.map(_.getName).sorted.mkString(", ")}), " +
+      s"but expected $EXPECTED_FIELD_COUNT. " +
+      s"If a new extension point was added, register it in Analyzer.withCatalogManager, " +
+      s"add an assertion in this test, and update EXPECTED_FIELD_COUNT.")
+
+    val analyzer = new Analyzer(newCatalogManager()) {
+      override val hintResolutionRules: Seq[Rule[LogicalPlan]] = Seq(dummyRule)
+      override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq(dummyRule)
+      override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Seq(dummyRule)
+      override val extendedCheckRules: Seq[LogicalPlan => Unit] = Seq(dummyCheck)
+      override val singlePassResolverExtensions: Seq[ResolverExtension] = Seq(dummyExtension)
+      override val singlePassMetadataResolverExtensions: Seq[ResolverExtension] =
+        Seq(dummyExtension)
+      override val singlePassPostHocResolutionRules: Seq[Rule[LogicalPlan]] = Seq(dummyRule)
+      override val singlePassExtendedResolutionChecks: Seq[LogicalPlan => Unit] = Seq(dummyCheck)
+    }
+
+    val clone = analyzer.withCatalogManager(newCatalogManager())
+
+    assert(clone.hintResolutionRules eq analyzer.hintResolutionRules)
+    assert(clone.extendedResolutionRules eq analyzer.extendedResolutionRules)
+    assert(clone.postHocResolutionRules eq analyzer.postHocResolutionRules)
+    assert(clone.extendedCheckRules eq analyzer.extendedCheckRules)
+    assert(clone.singlePassResolverExtensions eq analyzer.singlePassResolverExtensions)
+    assert(clone.singlePassMetadataResolverExtensions eq
+      analyzer.singlePassMetadataResolverExtensions)
+    assert(clone.singlePassPostHocResolutionRules eq analyzer.singlePassPostHocResolutionRules)
+    assert(clone.singlePassExtendedResolutionChecks eq analyzer.singlePassExtendedResolutionChecks)
+
+    // Verify the clone's anonymous class overrides exactly the expected extension points.
+    // If this assertion fails, withCatalogManager was updated but this test was not.
+    // Add the corresponding assert above and update the expected set.
+    val overriddenMethods = clone.getClass.getDeclaredMethods
+      .filterNot(m => m.isSynthetic || m.isBridge || m.getName.contains("$"))
+      .map(_.getName)
+      .toSet
+
+    val expectedExtensions = Set(
+      "hintResolutionRules",
+      "extendedResolutionRules",
+      "postHocResolutionRules",
+      "extendedCheckRules",
+      "singlePassResolverExtensions",
+      "singlePassMetadataResolverExtensions",
+      "singlePassPostHocResolutionRules",
+      "singlePassExtendedResolutionChecks"
+    )
+
+    assert(overriddenMethods == expectedExtensions,
+      s"withCatalogManager does not copy the expected set of extension points. " +
+      s"Missing from withCatalogManager: ${expectedExtensions -- overriddenMethods}. " +
+      s"Unexpected overrides: ${overriddenMethods -- expectedExtensions}.")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index fa5027ce259d5..1f415c5ede44b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -88,6 +88,7 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     shouldCast(checkedType, DecimalType, DecimalType.SYSTEM_DEFAULT)
     shouldCast(checkedType, NumericType, NumericType.defaultConcreteType)
     shouldCast(checkedType, AnyTimestampType, AnyTimestampType.defaultConcreteType)
+    shouldCast(checkedType, AnyTimeType, AnyTimeType.defaultConcreteType)
     shouldNotCast(checkedType, IntegralType)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ChangelogInfoUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ChangelogContextUtilsSuite.scala
similarity index 80%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ChangelogInfoUtilsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ChangelogContextUtilsSuite.scala
index 312754fa24dd9..93bab0009d67e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ChangelogInfoUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ChangelogContextUtilsSuite.scala
@@ -22,11 +22,11 @@ import scala.jdk.CollectionConverters._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.connector.catalog.{ChangelogInfo, ChangelogRange}
+import org.apache.spark.sql.connector.catalog.{ChangelogContext, ChangelogRange}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
+class ChangelogContextUtilsSuite extends SparkFunSuite with SQLHelper {
 
   private val testTimeZone = "UTC"
 
@@ -35,7 +35,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("version range with both start and end") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("startingVersion" -> "1", "endingVersion" -> "5"), testTimeZone)
     val range = info.range().asInstanceOf[ChangelogRange.VersionRange]
     assert(range.startingVersion() == "1")
@@ -45,7 +45,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("version range with only start") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("startingVersion" -> "10"), testTimeZone)
     val range = info.range().asInstanceOf[ChangelogRange.VersionRange]
     assert(range.startingVersion() == "10")
@@ -55,14 +55,14 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   test("version range - endingVersion without startingVersion throws") {
     checkError(
       intercept[AnalysisException] {
-        ChangelogInfoUtils.fromOptions(
+        ChangelogContextUtils.fromOptions(
           makeOptions("endingVersion" -> "5"), testTimeZone)
       },
       condition = "INVALID_CDC_OPTION.MISSING_STARTING_VERSION")
   }
 
   test("timestamp range with both start and end") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("startingTimestamp" -> "2026-01-01", "endingTimestamp" -> "2026-02-01"),
       testTimeZone)
     val range = info.range().asInstanceOf[ChangelogRange.TimestampRange]
@@ -72,7 +72,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("timestamp range with only start") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("startingTimestamp" -> "2026-01-01"), testTimeZone)
     val range = info.range().asInstanceOf[ChangelogRange.TimestampRange]
     assert(!range.endingTimestamp().isPresent)
@@ -81,7 +81,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   test("timestamp range - endingTimestamp without startingTimestamp throws") {
     checkError(
       intercept[AnalysisException] {
-        ChangelogInfoUtils.fromOptions(
+        ChangelogContextUtils.fromOptions(
           makeOptions("endingTimestamp" -> "2026-02-01"), testTimeZone)
       },
       condition = "INVALID_CDC_OPTION.MISSING_STARTING_TIMESTAMP")
@@ -90,7 +90,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   test("cannot mix version and timestamp range") {
     checkError(
       intercept[AnalysisException] {
-        ChangelogInfoUtils.fromOptions(
+        ChangelogContextUtils.fromOptions(
           makeOptions("startingVersion" -> "1", "startingTimestamp" -> "2026-01-01"),
           testTimeZone)
       },
@@ -98,37 +98,37 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("unbounded range when no version or timestamp specified") {
-    val info = ChangelogInfoUtils.fromOptions(makeOptions(), testTimeZone)
+    val info = ChangelogContextUtils.fromOptions(makeOptions(), testTimeZone)
     assert(info.range().isInstanceOf[ChangelogRange.UnboundedRange])
   }
 
   test("deduplication mode - none") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("deduplicationMode" -> "none"), testTimeZone)
-    assert(info.deduplicationMode() == ChangelogInfo.DeduplicationMode.NONE)
+    assert(info.deduplicationMode() == ChangelogContext.DeduplicationMode.NONE)
   }
 
   test("deduplication mode - dropCarryovers (default)") {
-    val info = ChangelogInfoUtils.fromOptions(makeOptions(), testTimeZone)
-    assert(info.deduplicationMode() == ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
+    val info = ChangelogContextUtils.fromOptions(makeOptions(), testTimeZone)
+    assert(info.deduplicationMode() == ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
   }
 
   test("deduplication mode - netChanges") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("deduplicationMode" -> "netChanges"), testTimeZone)
-    assert(info.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES)
+    assert(info.deduplicationMode() == ChangelogContext.DeduplicationMode.NET_CHANGES)
   }
 
   test("deduplication mode - case insensitive") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("deduplicationMode" -> "DROPCARRYOVERS"), testTimeZone)
-    assert(info.deduplicationMode() == ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
+    assert(info.deduplicationMode() == ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
   }
 
   test("deduplication mode - invalid value throws") {
     checkError(
       intercept[AnalysisException] {
-        ChangelogInfoUtils.fromOptions(
+        ChangelogContextUtils.fromOptions(
           makeOptions("deduplicationMode" -> "invalid"), testTimeZone)
       },
       condition = "INVALID_CDC_OPTION.INVALID_DEDUPLICATION_MODE",
@@ -136,18 +136,18 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("computeUpdates option") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions("computeUpdates" -> "true"), testTimeZone)
     assert(info.computeUpdates())
   }
 
   test("computeUpdates defaults to false") {
-    val info = ChangelogInfoUtils.fromOptions(makeOptions(), testTimeZone)
+    val info = ChangelogContextUtils.fromOptions(makeOptions(), testTimeZone)
     assert(!info.computeUpdates())
   }
 
   test("bound inclusivity options") {
-    val info = ChangelogInfoUtils.fromOptions(
+    val info = ChangelogContextUtils.fromOptions(
       makeOptions(
         "startingVersion" -> "1",
         "endingVersion" -> "5",
@@ -162,7 +162,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
   test("invalid timestamp throws") {
     checkError(
       intercept[AnalysisException] {
-        ChangelogInfoUtils.fromOptions(
+        ChangelogContextUtils.fromOptions(
           makeOptions("startingTimestamp" -> "not-a-timestamp"), testTimeZone)
       },
       condition = "INVALID_CDC_OPTION.INVALID_TIMESTAMP",
@@ -177,7 +177,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
     // = 2026-01-01 08:00:00 UTC = expectedUtcMicros + 8h
     val expectedPstMicros = 1767254400000000L
 
-    val utcInfo = ChangelogInfoUtils.fromOptions(
+    val utcInfo = ChangelogContextUtils.fromOptions(
       makeOptions("startingTimestamp" -> tsStr), "UTC")
     val utcRange =
       utcInfo.range().asInstanceOf[ChangelogRange.TimestampRange]
@@ -185,7 +185,7 @@ class ChangelogInfoUtilsSuite extends SparkFunSuite with SQLHelper {
 
     withSQLConf(
         SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles") {
-      val laInfo = ChangelogInfoUtils.fromOptions(
+      val laInfo = ChangelogContextUtils.fromOptions(
         makeOptions("startingTimestamp" -> tsStr),
         SQLConf.get.sessionLocalTimeZone)
       val laRange =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
index 183d37f972a2e..93906c7565701 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog,
 import org.apache.spark.sql.catalyst.expressions.{Alias, ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.connector.catalog.{CatalogManager, FunctionCatalog, Identifier}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, DefaultCatalogManager, FunctionCatalog, Identifier}
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -45,7 +45,7 @@ class LookupFunctionsSuite extends PlanTest {
           CatalogDatabase("db1", "", new URI("loc2"), Map.empty),
           ignoreIfExists = false)
         val catalog = new SessionCatalog(externalCatalog, new SimpleFunctionRegistry)
-        val catalogManager = new CatalogManager(new CustomV2SessionCatalog(catalog), catalog)
+        val catalogManager = new DefaultCatalogManager(new CustomV2SessionCatalog(catalog), catalog)
         catalogManager.setCurrentNamespace(Array("db1"))
         try {
           val analyzer = new Analyzer(catalogManager)
@@ -75,7 +75,7 @@ class LookupFunctionsSuite extends PlanTest {
   test("SPARK-23486: the getFunction for the Persistent function check") {
     val externalCatalog = new CustomInMemoryCatalog
     val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin.clone())
-    val catalogManager = new CatalogManager(new CustomV2SessionCatalog(catalog), catalog)
+    val catalogManager = new DefaultCatalogManager(new CustomV2SessionCatalog(catalog), catalog)
     val analyzer = {
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI("loc"), Map.empty),
@@ -100,7 +100,7 @@ class LookupFunctionsSuite extends PlanTest {
     val externalCatalog = new InMemoryCatalog
     val customerFunctionReg = new CustomerFunctionRegistry
     val catalog = new SessionCatalog(externalCatalog, customerFunctionReg)
-    val catalogManager = new CatalogManager(new CustomV2SessionCatalog(catalog), catalog)
+    val catalogManager = new DefaultCatalogManager(new CustomV2SessionCatalog(catalog), catalog)
     val analyzer = {
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI("loc"), Map.empty),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
index 9685ed5c6d256..75846aa49616c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, InMemoryTable, InMemoryTableCatalog, Table}
 import org.apache.spark.sql.connector.catalog.TableWritePrivilege
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class TableLookupCacheSuite extends AnalysisTest with Matchers {
@@ -74,6 +75,17 @@ class TableLookupCacheSuite extends AnalysisTest with Matchers {
     when(catalogManager.v1SessionCatalog).thenReturn(v1Catalog)
     when(catalogManager.currentCatalog).thenReturn(v2Catalog)
     when(catalogManager.currentNamespace).thenReturn(Array("default"))
+    when(catalogManager.sessionPathEntries).thenReturn(None)
+    val defaultPath = SQLConf.get.resolutionSearchPath(
+      (v2Catalog.name() +: Array("default")).toSeq)
+    when(catalogManager.sqlResolutionPathEntries(
+      any[String], any[Seq[String]], any[String], any[Seq[String]]))
+      .thenReturn(defaultPath)
+    when(catalogManager.sqlResolutionPathEntries(any[String], any[Seq[String]]))
+      .thenReturn(defaultPath)
+    when(catalogManager.resolutionPathEntriesForAnalysis(
+      any[Option[Seq[Seq[String]]]], any[Seq[String]]))
+      .thenReturn(defaultPath)
 
     new Analyzer(catalogManager)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index e6a9690ad7570..c59b687dc6ed5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -217,6 +217,15 @@ abstract class TypeCoercionSuiteBase extends AnalysisTest {
     shouldNotCast(checkedType, IntegralType)
   }
 
+  test("SPARK-56152: implicit type cast - TimeType") {
+    val checkedType = TimeType()
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType) ++ datetimeTypes)
+    shouldCast(checkedType, AnyTimeType, AnyTimeType.defaultConcreteType)
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
   test("implicit type cast between two Map types") {
     val sourceType = MapType(IntegerType, IntegerType, true)
     val castableTypes = numericTypes ++ Seq(StringType).filter(!Cast.forceNullable(IntegerType, _))
@@ -523,6 +532,7 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase {
     shouldCast(checkedType, DecimalType, DecimalType.SYSTEM_DEFAULT)
     shouldCast(checkedType, NumericType, NumericType.defaultConcreteType)
     shouldCast(checkedType, AnyTimestampType, AnyTimestampType.defaultConcreteType)
+    shouldCast(checkedType, AnyTimeType, AnyTimeType.defaultConcreteType)
     shouldNotCast(checkedType, IntegralType)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolverSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolverSuite.scala
index 8897d65654540..f54ab9e4e0ddd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolverSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolverSuite.scala
@@ -37,7 +37,9 @@ class TimezoneAwareExpressionResolverSuite extends SparkFunSuite {
       extends ExpressionResolver(
         resolver = new Resolver(catalogManager),
         functionResolution =
-          new FunctionResolution(catalogManager, Resolver.createRelationResolution(catalogManager)),
+          new FunctionResolution(
+            catalogManager,
+            Resolver.createRelationResolution(catalogManager)),
         planLogger = new PlanLogger
       ) {
     override def resolve(expression: Expression): Expression = resolvedExpression
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala
new file mode 100644
index 0000000000000..0ed3bcfb19639
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.json4s.JsonAST.{JArray, JObject, JString}
+import org.json4s.jackson.JsonMethods.{compact, render}
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * Unit tests for [[SqlPathFormat]] -- the helper that converts the raw JSON-array-of-arrays
+ * path stored on view / SQL function metadata into the JSON-object form used by DESCRIBE
+ * AS JSON and the human-readable form used by DESCRIBE EXTENDED.
+ */
+class SqlPathFormatSuite extends SparkFunSuite {
+
+  private def compactJson(v: JArray): String = compact(render(v))
+
+  test("toDescribeJson: maps each [catalog, ns...] entry to a JSON object") {
+    val stored =
+      """[["spark_catalog","default"],["system","builtin"]]"""
+    val result = SqlPathFormat.toDescribeJson(stored)
+      .getOrElse(fail(s"Expected a JSON value, got None for: $stored"))
+    val expected = JArray(List(
+      JObject("catalog_name" -> JString("spark_catalog"),
+        "namespace" -> JArray(List(JString("default")))),
+      JObject("catalog_name" -> JString("system"),
+        "namespace" -> JArray(List(JString("builtin"))))))
+    assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected))
+  }
+
+  test("toDescribeJson: multi-level namespace becomes [head, tail...]") {
+    val stored = """[["cat1","db","sub"]]"""
+    val result = SqlPathFormat.toDescribeJson(stored)
+      .getOrElse(fail("Expected a JSON value"))
+    val expected = JArray(List(
+      JObject("catalog_name" -> JString("cat1"),
+        "namespace" -> JArray(List(JString("db"), JString("sub"))))))
+    assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected))
+  }
+
+  test("toDescribeJson: empty array returns None") {
+    assert(SqlPathFormat.toDescribeJson("[]").isEmpty)
+  }
+
+  test("toDescribeJson: malformed payloads return None") {
+    Seq(
+      "",
+      "not_json",
+      "{}",
+      """{"foo":1}""",
+      """[1, 2, 3]"""
+    ).foreach { payload =>
+      assert(SqlPathFormat.toDescribeJson(payload).isEmpty, s"payload=$payload")
+    }
+  }
+
+  test("formatForDisplay: renders plain identifiers without backticks") {
+    val json = SqlPathFormat.toDescribeJson(
+      """[["spark_catalog","default"],["system","builtin"]]""")
+      .getOrElse(fail("Expected a JSON value"))
+    val rendered = SqlPathFormat.formatForDisplay(json)
+      .getOrElse(fail("Expected a display string"))
+    assert(rendered == "spark_catalog.default, system.builtin")
+  }
+
+  test("formatForDisplay: backticks identifiers that need quoting") {
+    val json = SqlPathFormat.toDescribeJson(
+      """[["spark_catalog","weird.schema"]]""")
+      .getOrElse(fail("Expected a JSON value"))
+    val rendered = SqlPathFormat.formatForDisplay(json)
+      .getOrElse(fail("Expected a display string"))
+    assert(rendered == "spark_catalog.`weird.schema`")
+  }
+
+  test("formatForDisplay: round-trips multi-level namespaces") {
+    val json = SqlPathFormat.toDescribeJson("""[["cat","db","ns"]]""")
+      .getOrElse(fail("Expected a JSON value"))
+    val rendered = SqlPathFormat.formatForDisplay(json)
+      .getOrElse(fail("Expected a display string"))
+    assert(rendered == "cat.db.ns")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index e8da77e834333..0cf269b8360e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -962,6 +962,33 @@ class CollectionExpressionsSuite
       Some(Literal.create(null, StringType))), null)
   }
 
+  test("ArrayJoin codegen with non-nullable array/delimiter and nullable " +
+    "nullReplacement") {
+    // When an upstream IsNotNull filter tightens the array and delimiter to
+    // non-nullable but the nullReplacement is a nullable column, ArrayJoin.nullable is true so
+    // doGenCode initializes ev.isNull = true. The non-nullable branch of
+    // genCodeForArrayAndDelimiter must still reset ev.isNull = false, otherwise codegen builds the
+    // joined string but discards it as NULL while interpreted eval() returns the correct result.
+    val arr = BoundReference(0, ArrayType(StringType, containsNull = true), nullable = false)
+    val delimiter = BoundReference(1, StringType, nullable = false)
+    val nullReplacement = BoundReference(2, StringType, nullable = true)
+    val arrayJoin = ArrayJoin(arr, delimiter, Some(nullReplacement))
+    // ArrayJoin is nullable only because nullReplacement is nullable.
+    assert(arrayJoin.nullable)
+
+    // Non-null replacement: NULL array elements are replaced and a joined string is produced.
+    checkEvaluation(
+      arrayJoin,
+      "a,NR,b",
+      create_row(Seq[String]("a", null, "b"), ",", "NR"))
+
+    // Null replacement value: the whole result is NULL, matching eval().
+    checkEvaluation(
+      arrayJoin,
+      null,
+      create_row(Seq[String]("a", null, "b"), ",", null))
+  }
+
   test("ArraysZip") {
     val literals = Seq(
       Literal.create(Seq(9001, 9002, 9003, null), ArrayType(IntegerType)),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 540c9830deb44..4a2b23fe059ba 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -29,7 +29,9 @@ import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkUpgradeException}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
@@ -851,6 +853,21 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("TruncTimestamp of Long.MinValue overflows with ArithmeticException") {
+    withDefaultTimeZone(UTC) {
+      // Long.MinValue is the smallest representable timestamp value (in micros). Truncating it
+      // rounds the value down to an earlier instant, which falls below the representable micros
+      // range. The overflow must surface as an ArithmeticException instead of silently wrapping
+      // around to a bogus (positive) timestamp.
+      val minTimestamp = Literal.create(Long.MinValue, TimestampType)
+      Seq("YEAR", "QUARTER", "MONTH", "WEEK", "DAY", "HOUR", "MINUTE",
+          "SECOND", "MILLISECOND").foreach { fmt =>
+        checkExceptionInExpression[ArithmeticException](
+          TruncTimestamp(Literal.create(fmt, StringType), minTimestamp), "")
+      }
+    }
+  }
+
   test("unsupported fmt fields for trunc/date_trunc results null") {
     Seq("INVALID", "decade", "century", "millennium", "whatever", null).foreach { field =>
       testTruncDate(Date.valueOf("2000-03-08"), field, null)
@@ -2313,4 +2330,319 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       null
     )
   }
+
+  test("time_bucket: day-time interval") {
+    // Pin session zone to UTC so the whole-day TIMESTAMP (LTZ) case is deterministic; the
+    // session-zone behavior is exercised by the dedicated test below.
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US)
+      sdf.setTimeZone(TimeZone.getTimeZone(UTC))
+      Seq(TimestampType, TimestampNTZType).foreach { dt =>
+        // 15-minute bucket with epoch origin
+        checkEvaluation(
+          TimeBucket(
+            Literal(Duration.ofMinutes(15)),
+            timestampLiteral("2024-01-01 11:27:00.000", sdf, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          timestampAnswer("2024-01-01 11:15:00.000", sdf, dt))
+        // 1-hour bucket with custom origin (:05 alignment)
+        checkEvaluation(
+          TimeBucket(
+            Literal(Duration.ofHours(1)),
+            timestampLiteral("2024-01-01 11:27:00.000", sdf, dt),
+            timestampLiteral("1970-01-01 00:05:00.000", sdf, dt)),
+          timestampAnswer("2024-01-01 11:05:00.000", sdf, dt))
+        // Pre-epoch ts
+        checkEvaluation(
+          TimeBucket(
+            Literal(Duration.ofDays(1)),
+            timestampLiteral("1969-12-31 23:30:00.000", sdf, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          timestampAnswer("1969-12-31 00:00:00.000", sdf, dt))
+        // NULL ts -> NULL
+        checkEvaluation(
+          TimeBucket(
+            Literal(Duration.ofHours(1)),
+            Literal.create(null, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          null)
+        // NULL bucketSize -> NULL
+        checkEvaluation(
+          TimeBucket(
+            Literal.create(null, DayTimeIntervalType()),
+            timestampLiteral("2024-01-01 11:27:00.000", sdf, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          null)
+        // NULL origin -> NULL
+        checkEvaluation(
+          TimeBucket(
+            Literal(Duration.ofHours(1)),
+            timestampLiteral("2024-01-01 11:27:00.000", sdf, dt),
+            Literal.create(null, dt)),
+          null)
+      }
+    }
+  }
+
+  test("time_bucket: day-time interval honors session time zone for TIMESTAMP") {
+    // For TIMESTAMP (LTZ), the calendar-day component of a day-time bucket aligns to
+    // the session time zone. Sub-day remainders are zone-independent.
+    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US)
+    sdf.setTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles") {
+      val laOriginMicros = DateTimeUtils.daysToMicros(0, getZoneId("America/Los_Angeles"))
+      val laOrigin = Literal(laOriginMicros, TimestampType)
+
+      // Winter ts in LA: 2024-02-15 10:00 PST. Bucket = 2024-02-15 00:00 PST.
+      checkEvaluation(
+        TimeBucket(
+          Literal(Duration.ofDays(1)),
+          timestampLiteral("2024-02-15 10:00:00.000", sdf, TimestampType),
+          laOrigin),
+        timestampAnswer("2024-02-15 00:00:00.000", sdf, TimestampType))
+
+      // Spring-forward day in LA (2024-03-10): the day has only 23 UTC hours but the
+      // bucket still covers the local calendar day.
+      checkEvaluation(
+        TimeBucket(
+          Literal(Duration.ofDays(1)),
+          timestampLiteral("2024-03-10 12:00:00.000", sdf, TimestampType),
+          laOrigin),
+        timestampAnswer("2024-03-10 00:00:00.000", sdf, TimestampType))
+
+      // Summer ts in LA: 2024-07-15 10:00 PDT. Bucket = 2024-07-15 00:00 PDT.
+      checkEvaluation(
+        TimeBucket(
+          Literal(Duration.ofDays(1)),
+          timestampLiteral("2024-07-15 10:00:00.000", sdf, TimestampType),
+          laOrigin),
+        timestampAnswer("2024-07-15 00:00:00.000", sdf, TimestampType))
+
+      // Fall-back day in LA (2024-11-03): the day spans 25 UTC hours but the bucket
+      // still covers the local calendar day. ts at 18:00 PST buckets to 00:00 PDT.
+      checkEvaluation(
+        TimeBucket(
+          Literal(Duration.ofDays(1)),
+          timestampLiteral("2024-11-03 18:00:00.000", sdf, TimestampType),
+          laOrigin),
+        timestampAnswer("2024-11-03 00:00:00.000", sdf, TimestampType))
+
+      // Compound DT (36h = 1 day + 12h) across spring-forward: exercises the
+      // estimate-and-adjust step-forward path. Origin = 2024-03-08 00:00 PST; ts on the
+      // 4th bucket boundary lands 1 hour later than a UTC-linear estimate would predict.
+      val springOrigin = Literal(
+        DateTimeUtils.daysToMicros(
+          java.time.LocalDate.of(2024, 3, 8).toEpochDay.toInt, getZoneId("America/Los_Angeles")),
+        TimestampType)
+      checkEvaluation(
+        TimeBucket(
+          Literal(Duration.ofHours(36)),
+          timestampLiteral("2024-03-12 12:00:00.000", sdf, TimestampType),
+          springOrigin),
+        timestampAnswer("2024-03-12 12:00:00.000", sdf, TimestampType))
+
+      // Compound DT (36h) across fall-back: exercises step-back. Origin = 2024-11-01
+      // 00:00 PDT; ts at 2024-11-05 11:30 PST buckets back to 2024-11-04 00:00 PST
+      // (= origin + INTERVAL '72' HOUR; linear estimate would land on 2024-11-05 11:00).
+      val fallOrigin = Literal(
+        DateTimeUtils.daysToMicros(
+          java.time.LocalDate.of(2024, 11, 1).toEpochDay.toInt, getZoneId("America/Los_Angeles")),
+        TimestampType)
+      checkEvaluation(
+        TimeBucket(
+          Literal(Duration.ofHours(36)),
+          timestampLiteral("2024-11-05 11:30:00.000", sdf, TimestampType),
+          fallOrigin),
+        timestampAnswer("2024-11-04 00:00:00.000", sdf, TimestampType))
+    }
+  }
+
+  test("time_bucket: year-month interval") {
+    // Pin session zone to UTC; the LTZ session-zone behavior is covered by the test below.
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US)
+      sdf.setTimeZone(TimeZone.getTimeZone(UTC))
+      Seq(TimestampType, TimestampNTZType).foreach { dt =>
+        // 1-month bucket
+        checkEvaluation(
+          TimeBucket(
+            Literal(Period.ofMonths(1)),
+            timestampLiteral("2024-03-15 11:27:00.000", sdf, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          timestampAnswer("2024-03-01 00:00:00.000", sdf, dt))
+        // 3-month (quarterly) bucket
+        checkEvaluation(
+          TimeBucket(
+            Literal(Period.ofMonths(3)),
+            timestampLiteral("2024-05-15 10:00:00.000", sdf, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          timestampAnswer("2024-04-01 00:00:00.000", sdf, dt))
+        // End-of-month capping with step-back: origin on 1970-01-31, 1-month bucket,
+        // ts in early March of a leap year -> 2024-02-29.
+        checkEvaluation(
+          TimeBucket(
+            Literal(Period.ofMonths(1)),
+            timestampLiteral("2024-03-01 12:00:00.000", sdf, dt),
+            timestampLiteral("1970-01-31 00:00:00.000", sdf, dt)),
+          timestampAnswer("2024-02-29 00:00:00.000", sdf, dt))
+        // NULL bucketSize (YM) -> NULL
+        checkEvaluation(
+          TimeBucket(
+            Literal.create(null, YearMonthIntervalType()),
+            timestampLiteral("2024-03-15 11:27:00.000", sdf, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          null)
+        // NULL ts (YM) -> NULL
+        checkEvaluation(
+          TimeBucket(
+            Literal(Period.ofMonths(1)),
+            Literal.create(null, dt),
+            timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)),
+          null)
+        // NULL origin (YM) -> NULL
+        checkEvaluation(
+          TimeBucket(
+            Literal(Period.ofMonths(1)),
+            timestampLiteral("2024-03-15 11:27:00.000", sdf, dt),
+            Literal.create(null, dt)),
+          null)
+      }
+    }
+  }
+
+  test("time_bucket: year-month interval honors session time zone for TIMESTAMP") {
+    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US)
+    sdf.setTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles") {
+      val laOriginMicros = DateTimeUtils.daysToMicros(0, getZoneId("America/Los_Angeles"))
+      val laOrigin = Literal(laOriginMicros, TimestampType)
+
+      // Winter ts in LA: 2024-02-15 10:00 PST. Bucket = 2024-02-01 00:00 PST.
+      checkEvaluation(
+        TimeBucket(
+          Literal(Period.ofMonths(1)),
+          timestampLiteral("2024-02-15 10:00:00.000", sdf, TimestampType),
+          laOrigin),
+        timestampAnswer("2024-02-01 00:00:00.000", sdf, TimestampType))
+
+      // Summer ts in LA: 2024-07-15 10:00 PDT. Bucket = 2024-07-01 00:00 PDT.
+      checkEvaluation(
+        TimeBucket(
+          Literal(Period.ofMonths(1)),
+          timestampLiteral("2024-07-15 10:00:00.000", sdf, TimestampType),
+          laOrigin),
+        timestampAnswer("2024-07-01 00:00:00.000", sdf, TimestampType))
+    }
+  }
+
+  test("time_bucket: ExpressionBuilder") {
+    // Pin session zone to UTC so the LTZ default origin resolves to 0L. The non-UTC case
+    // is covered by the dedicated test below.
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US)
+      sdf.setTimeZone(TimeZone.getTimeZone(UTC))
+      val hour = Literal(Duration.ofHours(1))
+      val ts = timestampLiteral("2024-01-01 11:27:00.000", sdf, TimestampType)
+      val tsNtz = timestampLiteral("2024-01-01 11:27:00.000", sdf, TimestampNTZType)
+      val ntzOrigin = timestampLiteral("1970-01-01 00:00:00.000", sdf, TimestampNTZType)
+
+      // 2-arg: default origin is epoch with ts's type (TIMESTAMP)
+      val built1 = TimeBucketExpressionBuilder.build("time_bucket", Seq(hour, ts))
+        .asInstanceOf[TimeBucket]
+      assert(built1.originTs == Literal(0L, TimestampType))
+
+      // 2-arg with TIMESTAMP_NTZ ts: default origin is epoch with TIMESTAMP_NTZ
+      val built2 = TimeBucketExpressionBuilder.build("time_bucket", Seq(hour, tsNtz))
+        .asInstanceOf[TimeBucket]
+      assert(built2.originTs == Literal(0L, TimestampNTZType))
+
+      // NULL ts + TIMESTAMP_NTZ origin: ts retyped to TIMESTAMP_NTZ to match origin
+      val built3 = TimeBucketExpressionBuilder.build(
+        "time_bucket", Seq(hour, Literal(null, NullType), ntzOrigin))
+        .asInstanceOf[TimeBucket]
+      assert(built3.ts.dataType == TimestampNTZType)
+
+      // NULL origin + TIMESTAMP_NTZ ts: origin retyped to TIMESTAMP_NTZ to match ts
+      val built4 = TimeBucketExpressionBuilder.build(
+        "time_bucket", Seq(hour, tsNtz, Literal(null, NullType)))
+        .asInstanceOf[TimeBucket]
+      assert(built4.originTs.dataType == TimestampNTZType)
+
+      // Bare NULL as bucketSize: retyped to DayTimeIntervalType
+      val built5 = TimeBucketExpressionBuilder.build(
+        "time_bucket", Seq(Literal(null, NullType), ts))
+        .asInstanceOf[TimeBucket]
+      assert(built5.bucketSize.dataType == DayTimeIntervalType())
+
+      // Wrong arg count
+      intercept[AnalysisException] {
+        TimeBucketExpressionBuilder.build("time_bucket", Seq(hour))
+      }
+      intercept[AnalysisException] {
+        TimeBucketExpressionBuilder.build("time_bucket", Seq(hour, ts, ts, ts))
+      }
+    }
+  }
+
+  test("time_bucket: ExpressionBuilder in non-UTC session") {
+    // In a non-UTC session, the 2-arg form's default origin shifts to the UTC instant of
+    // local 1970-01-01 00:00 so monthly/yearly buckets land at local calendar boundaries.
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles") {
+      val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US)
+      sdf.setTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))
+      val ts = timestampLiteral("2024-07-15 10:00:00.000", sdf, TimestampType)
+      val tsNtz = timestampLiteral("2024-07-15 10:00:00.000", sdf, TimestampNTZType)
+      val month = Literal(Period.ofMonths(1))
+
+      // LTZ: default origin is local 1970-01-01 00:00 PST (= 28800000000L UTC micros).
+      val builtLtz = TimeBucketExpressionBuilder.build("time_bucket", Seq(month, ts))
+        .asInstanceOf[TimeBucket]
+      val expectedOriginMicros =
+        DateTimeUtils.daysToMicros(0, getZoneId("America/Los_Angeles"))
+      assert(builtLtz.originTs == Literal(expectedOriginMicros, TimestampType))
+
+      // NTZ: default origin still 0L wall-clock (session zone irrelevant).
+      val builtNtz = TimeBucketExpressionBuilder.build("time_bucket", Seq(month, tsNtz))
+        .asInstanceOf[TimeBucket]
+      assert(builtNtz.originTs == Literal(0L, TimestampNTZType))
+    }
+  }
+
+  test("time_bucket: checkInputDataTypes") {
+    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US)
+    sdf.setTimeZone(TimeZone.getTimeZone(UTC))
+    val tsLit = timestampLiteral("2024-01-01 00:00:00.000", sdf, TimestampType)
+    val originLit = tsLit
+    val hour = Literal(Duration.ofHours(1))
+
+    // Non-foldable bucketSize
+    val nonFoldableBucket = AttributeReference("bs", DayTimeIntervalType())()
+    val expr1 = TimeBucket(nonFoldableBucket, tsLit, originLit)
+    val r1 = expr1.checkInputDataTypes().asInstanceOf[DataTypeMismatch]
+    assert(r1.errorSubClass == "NON_FOLDABLE_INPUT")
+    assert(r1.messageParameters("inputName") == "`bucketSize`")
+
+    // Non-foldable origin
+    val nonFoldableOrigin = AttributeReference("o", TimestampType)()
+    val expr2 = TimeBucket(hour, tsLit, nonFoldableOrigin)
+    val r2 = expr2.checkInputDataTypes().asInstanceOf[DataTypeMismatch]
+    assert(r2.errorSubClass == "NON_FOLDABLE_INPUT")
+    assert(r2.messageParameters("inputName") == "`origin`")
+
+    // Non-positive DT bucketSize
+    val expr3 = TimeBucket(Literal(Duration.ofMinutes(0)), tsLit, originLit)
+    val r3 = expr3.checkInputDataTypes().asInstanceOf[DataTypeMismatch]
+    assert(r3.errorSubClass == "VALUE_OUT_OF_RANGE")
+
+    // Non-positive YM bucketSize
+    val expr4 = TimeBucket(Literal(Period.ofMonths(-1)), tsLit, originLit)
+    val r4 = expr4.checkInputDataTypes().asInstanceOf[DataTypeMismatch]
+    assert(r4.errorSubClass == "VALUE_OUT_OF_RANGE")
+
+    // ts/origin type mismatch: TIMESTAMP ts vs TIMESTAMP_NTZ origin
+    val ntzOrigin = Literal(LocalDateTime.of(1970, 1, 1, 0, 0, 0))
+    val expr5 = TimeBucket(hour, tsLit, ntzOrigin)
+    val r5 = expr5.checkInputDataTypes().asInstanceOf[DataTypeMismatch]
+    assert(r5.errorSubClass == "UNEXPECTED_INPUT_TYPE")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruningSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruningSubquerySuite.scala
index 9d7d756019bdb..614a29c5ac4a0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruningSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruningSubquerySuite.scala
@@ -86,4 +86,29 @@ class DynamicPruningSubquerySuite extends SparkFunSuite {
       .copy(broadcastKeyIndices = Seq(1))
     assert(dynamicPruningSubquery.resolved == false)
   }
+
+  test("SPARK-56694: Canonicalized buildKeys are consistent for identical build queries with " +
+      "different ExprIds") {
+    val attr1 = AttributeReference("key", IntegerType)()
+    val attr2 = AttributeReference("key", IntegerType)()
+    assert(attr1.exprId != attr2.exprId, "precondition: fresh attributes have distinct ExprIds")
+
+    val dpq1 = DynamicPruningSubquery(
+      pruningKey = Literal(1),
+      buildQuery = LocalRelation(attr1),
+      buildKeys = Seq(attr1),
+      broadcastKeyIndices = Seq(0),
+      onlyInBroadcast = false)
+
+    val dpq2 = DynamicPruningSubquery(
+      pruningKey = Literal(1),
+      buildQuery = LocalRelation(attr2),
+      buildKeys = Seq(attr2),
+      broadcastKeyIndices = Seq(0),
+      onlyInBroadcast = false)
+
+    assert(dpq1.canonicalized == dpq2.canonicalized,
+      "DynamicPruningSubquery with identical build queries but different ExprIds " +
+      "must produce identical canonicalized forms so PlanMerger can deduplicate them")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 6b642e8746368..21882414ebd36 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.{CalendarInterval, GeographyVal, GeometryVal, UTF8String}
+import org.apache.spark.unsafe.types.{BinaryView, CalendarInterval, UTF8String}
 
 class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -668,7 +668,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     val geog = Geography.fromWKB(pointBytes, 4326)
     val lit = Literal.create(geog, GeographyType(4326))
     assert(lit.dataType === GeographyType(4326))
-    assert(lit.value.isInstanceOf[GeographyVal])
+    assert(lit.value.isInstanceOf[BinaryView])
   }
 
   test("Literal.create with null Geometry value") {
@@ -683,6 +683,6 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     val geom = Geometry.fromWKB(pointBytes, 0)
     val lit = Literal.create(geom, GeometryType(0))
     assert(lit.dataType === GeometryType(0))
-    assert(lit.value.isInstanceOf[GeometryVal])
+    assert(lit.value.isInstanceOf[BinaryView])
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index c74a9e35833d1..5c19e69cdfa3d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql.catalyst.expressions
 import java.sql.Timestamp
 
 import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, SimpleAnalyzer, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
@@ -143,6 +144,29 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(analyze(new Nvl(floatLit, doubleLit)).dataType == DoubleType)
   }
 
+  test("SPARK-56840: NullIf replacement preserves its data type before type coercion") {
+    Seq(true, false).foreach { alwaysInlineCommonExpr =>
+      withSQLConf(SQLConf.ALWAYS_INLINE_COMMON_EXPR.key -> alwaysInlineCommonExpr.toString) {
+        val nullIf = new NullIf(Literal(1), Literal(1))
+        assert(nullIf.dataType == IntegerType)
+        assert(nullIf.replacement.dataType == IntegerType)
+      }
+    }
+  }
+
+  test(
+    "SPARK-56840: NullIf accepts unresolved nested fields during inlined function construction") {
+    withSQLConf(SQLConf.ALWAYS_INLINE_COMMON_EXPR.key -> "true") {
+      val nullIf = FunctionRegistry.builtin.lookupFunction(
+        FunctionIdentifier("nullif"),
+        Seq(
+          UnresolvedAttribute(Seq("c", "provider")),
+          Lower(Literal("ERROR_MULTIPLE_PROVIDERS"))))
+
+      assert(nullIf.isInstanceOf[NullIf])
+    }
+  }
+
   test("AtLeastNNonNulls") {
     val mix = Seq(Literal("x"),
       Literal.create(null, StringType),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
index b64bc49f95446..a968526a89f10 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SchemaPruningSuite.scala
@@ -143,4 +143,5 @@ class SchemaPruningSuite extends SparkFunSuite with SQLHelper {
     val prunedSchema = SchemaPruning.pruneSchema(schema, rootFields)
     assert(prunedSchema.head.metadata.getString("foo") == "bar")
   }
+
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala
index 9c0d610f35f6b..719f4a5906f77 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala
@@ -87,8 +87,7 @@ object AlwaysNull extends InternalRow {
   override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = notSupported
   override def getUTF8String(ordinal: Int): UTF8String = notSupported
   override def getBinary(ordinal: Int): Array[Byte] = notSupported
-  override def getGeography(ordinal: Int): GeographyVal = notSupported
-  override def getGeometry(ordinal: Int): GeometryVal = notSupported
+  override def getBinaryView(ordinal: Int): BinaryView = notSupported
   override def getInterval(ordinal: Int): CalendarInterval = notSupported
   override def getVariant(ordinal: Int): VariantVal = notSupported
   override def getStruct(ordinal: Int, numFields: Int): InternalRow = notSupported
@@ -119,8 +118,7 @@ object AlwaysNonNull extends InternalRow {
   override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = notSupported
   override def getUTF8String(ordinal: Int): UTF8String = UTF8String.fromString("test")
   override def getBinary(ordinal: Int): Array[Byte] = notSupported
-  override def getGeography(ordinal: Int): GeographyVal = notSupported
-  override def getGeometry(ordinal: Int): GeometryVal = notSupported
+  override def getBinaryView(ordinal: Int): BinaryView = notSupported
   override def getInterval(ordinal: Int): CalendarInterval = notSupported
   override def getVariant(ordinal: Int): VariantVal = notSupported
   override def getStruct(ordinal: Int, numFields: Int): InternalRow = notSupported
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
index de62f8b46b7d3..5588f42d84b2b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
@@ -56,10 +56,10 @@ class UnsafeRowWriterSuite extends SparkFunSuite {
     rowWriter.resetRowWriter()
     rowWriter.setNullAt(0)
     assert(rowWriter.getRow.isNullAt(0))
-    assert(rowWriter.getRow.getGeography(0) === null)
-    val geography = GeographyVal.fromBytes(Array[Byte](1, 2, 3))
+    assert(rowWriter.getRow.getBinaryView(0) === null)
+    val geography = BinaryView.fromBytes(Array[Byte](1, 2, 3))
     rowWriter.write(1, geography)
-    assert(rowWriter.getRow.getGeography(1).getBytes sameElements geography.getBytes)
+    assert(rowWriter.getRow.getBinaryView(1).getBytes sameElements geography.getBytes)
   }
 
   test("write and get geometry through UnsafeRowWriter") {
@@ -67,10 +67,10 @@ class UnsafeRowWriterSuite extends SparkFunSuite {
     rowWriter.resetRowWriter()
     rowWriter.setNullAt(0)
     assert(rowWriter.getRow.isNullAt(0))
-    assert(rowWriter.getRow.getGeometry(0) === null)
-    val geometry = GeometryVal.fromBytes(Array[Byte](1, 2, 3))
+    assert(rowWriter.getRow.getBinaryView(0) === null)
+    val geometry = BinaryView.fromBytes(Array[Byte](1, 2, 3))
     rowWriter.write(1, geometry)
-    assert(rowWriter.getRow.getGeometry(1).getBytes sameElements geometry.getBytes)
+    assert(rowWriter.getRow.getBinaryView(1).getBytes sameElements geometry.getBytes)
   }
 
   test("write and get calendar intervals through UnsafeRowWriter") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
index 2aef7c455e64f..605c542ba7f4e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
@@ -140,6 +140,52 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
     }
   }
 
+  test("SPARK-56654: reject unpaired UTF-16 surrogates in JSON strings") {
+    val invalidJsonInputs = Seq(
+      "\"\\uD835\"",                  // lone high surrogate (string value)
+      "\"\\uDC00\"",                  // lone low surrogate (string value)
+      "\"\\uD835x\\uDC00\"",          // surrogates separated by non-surrogate
+      "\"\\uD835\\uD835\"",           // two high surrogates in a row
+      "\"prefix \\uD835\"",           // trailing lone high surrogate
+      "{\"\\uD835\":1}",              // lone surrogate in an object key
+      "[\"ok\", \"\\uDC00\"]"         // lone surrogate inside an array element
+    )
+    for (json <- invalidJsonInputs) {
+      checkError(
+        exception = intercept[SparkThrowable] {
+          VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+            allowDuplicateKeys = false)
+        },
+        condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+        parameters = Map("badRecord" -> json, "failFastMode" -> "FAILFAST")
+      )
+      val tryResult = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+        allowDuplicateKeys = false, failOnError = false)
+      assert(tryResult === null)
+    }
+    val validJsonInputs = Seq(
+      "\"\\uD83D\\uDE05\"",           // U+1F605 GRINNING FACE WITH SWEAT
+      "\"\\uD835\\uDC00\"",           // U+1D400 MATHEMATICAL BOLD CAPITAL A
+      "{\"\\uD83D\\uDE05\":1}",       // surrogate pair in an object key
+      "[\"\\uD835\\uDC00\"]"          // surrogate pair inside an array
+    )
+    for (json <- validJsonInputs) {
+      val parsed = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+        allowDuplicateKeys = false)
+      assert(parsed != null, s"expected non-null variant for $json")
+    }
+  }
+
+  test("SPARK-56654: legacy mode accepts unpaired surrogates") {
+    val json = "\"\\uD835\""
+    val parsed = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+      allowDuplicateKeys = false, validateUnicodeInJsonParsing = false)
+    assert(parsed != null)
+    val tryParsed = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+      allowDuplicateKeys = false, failOnError = false, validateUnicodeInJsonParsing = false)
+    assert(tryParsed != null)
+  }
+
   test("isVariantNull") {
     def check(json: String, expected: Boolean): Unit = {
       if (json != null) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
index ef2618e8455f2..dc42cd7fbb2eb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
@@ -1053,4 +1053,151 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
   }
+
+  test("is_valid_variant") {
+    val emptyMetadata = Array[Byte](VERSION, 0, 0)
+
+    // The row cannot be converted to string because the `VariantVal` may be malformed (toString
+    // will throw an exception).
+    class NoDisplayGenericInternalRow(values: Array[Any]) extends GenericInternalRow(values) {
+      override def toString: String = "NoDisplayGenericInternalRow"
+    }
+
+    def valid(value: Array[Byte], metadata: Array[Byte] = emptyMetadata): Unit = {
+      val row = new NoDisplayGenericInternalRow(Array(new VariantVal(value, metadata)))
+      val v = BoundReference(0, VariantType, nullable = true)
+      checkEvaluation(IsValidVariant(v), true, row)
+    }
+
+    def invalid(value: Array[Byte], metadata: Array[Byte] = emptyMetadata): Unit = {
+      val row = new NoDisplayGenericInternalRow(Array(new VariantVal(value, metadata)))
+      val v = BoundReference(0, VariantType, nullable = true)
+      checkEvaluation(IsValidVariant(v), false, row)
+    }
+
+    // Valid primitives.
+    valid(Array(primitiveHeader(NULL)))
+    valid(Array(primitiveHeader(TRUE)))
+    valid(Array(primitiveHeader(FALSE)))
+    valid(Array(primitiveHeader(INT1), 1))
+    valid(Array(primitiveHeader(INT2), 1, 0))
+    valid(Array(primitiveHeader(INT4), 1, 0, 0, 0))
+    valid(Array(primitiveHeader(INT8), 1, 0, 0, 0, 0, 0, 0, 0))
+    valid(Array(primitiveHeader(DOUBLE), 0, 0, 0, 0, 0, 0, 0, 0))
+    valid(Array(primitiveHeader(DECIMAL4), 0, 1, 0, 0, 0))
+    valid(Array(primitiveHeader(FLOAT), 0, 0, 0, 0))
+    valid(Array(primitiveHeader(DATE), 0, 0, 0, 0))
+    valid(Array(primitiveHeader(TIMESTAMP), 0, 0, 0, 0, 0, 0, 0, 0))
+    valid(Array(primitiveHeader(TIMESTAMP_NTZ), 0, 0, 0, 0, 0, 0, 0, 0))
+    valid(Array(shortStrHeader(3), 'a', 'b', 'c'))
+    valid(Array(primitiveHeader(LONG_STR), 2, 0, 0, 0, 'a', 'b'))
+    valid(Array(primitiveHeader(BINARY), 2, 0, 0, 0, 1, 2))
+    valid(Array(primitiveHeader(UUID)) ++ createArray[Byte](16, 0.toByte))
+
+    // Malformed primitives: truncated content.
+    invalid(Array(primitiveHeader(INT8), 0, 0, 0, 0, 0, 0, 0))
+    invalid(Array(primitiveHeader(DECIMAL4)))
+    invalid(Array(primitiveHeader(DECIMAL8)))
+    invalid(Array(primitiveHeader(DECIMAL16)))
+    invalid(Array(primitiveHeader(DECIMAL16)) ++ createArray[Byte](16, 0.toByte))
+    invalid(Array(shortStrHeader(2), 'x'))
+    invalid(Array(primitiveHeader(LONG_STR), 0, 0, 0))
+    invalid(Array(primitiveHeader(LONG_STR), 1, 0, 0, 0))
+
+    // Valid array.
+    valid(Array(arrayHeader(false, 1),
+      /* size */ 2,
+      /* offset list */ 0, 1, 2,
+      /* element data */ primitiveHeader(TRUE), primitiveHeader(FALSE)))
+
+    // Valid empty array.
+    valid(Array(arrayHeader(false, 1),
+      /* size */ 0,
+      /* offset list */ 0))
+
+    // Malformed array: size is 1 but no content.
+    invalid(Array(arrayHeader(false, 1),
+      /* size */ 1,
+      /* offset list */ 0))
+
+    // Malformed array: requires 4-byte size but only one byte given.
+    invalid(Array(arrayHeader(true, 1),
+      /* size */ 0,
+      /* offset list */ 0))
+
+    // Malformed array: offset out of bound.
+    invalid(Array(arrayHeader(false, 1),
+      /* size */ 1,
+      /* offset list */ 1, 1))
+
+    // Malformed array: nested element is malformed.
+    invalid(Array(arrayHeader(false, 1),
+      /* size */ 1,
+      /* offset list */ 0, 2,
+      /* element data: INT8 with only 1 byte */ primitiveHeader(INT8), 0))
+
+    // Valid object.
+    val metadata = Array[Byte](VERSION, 2, 0, 1, 2) ++ Array[Byte]('a', 'b')
+    valid(Array(objectHeader(false, 1, 1),
+      /* size */ 2,
+      /* id list */ 0, 1,
+      /* offset list */ 0, 2, 4,
+      /* field data */ primitiveHeader(INT1), 1, primitiveHeader(INT1), 2), metadata)
+
+    // Valid empty object.
+    valid(Array(objectHeader(false, 1, 1),
+      /* size */ 0,
+      /* offset list */ 0))
+
+    // Malformed object: id out of bound.
+    invalid(Array(objectHeader(false, 1, 1),
+      /* size */ 1,
+      /* id list */ 0,
+      /* offset list */ 0, 2,
+      /* field data */ primitiveHeader(INT1), 1))
+
+    // Malformed object: offset out of bound.
+    invalid(Array(objectHeader(false, 1, 1),
+      /* size */ 1,
+      /* id list */ 0,
+      /* offset list */ 5, 0,
+      /* field data */ primitiveHeader(INT1), 1), metadata)
+
+    // Malformed object: nested value is malformed.
+    invalid(Array(objectHeader(false, 1, 1),
+      /* size */ 1,
+      /* id list */ 0,
+      /* offset list */ 0, 2,
+      /* field data: INT8 with only 1 byte */ primitiveHeader(INT8), 0), metadata)
+
+    // Unknown primitive type (type info 17 is not defined).
+    invalid(Array(primitiveHeader(17)))
+
+    // Malformed metadata: version is not 1.
+    invalid(Array(primitiveHeader(INT1), 0), Array[Byte](3, 0, 0))
+    invalid(Array(primitiveHeader(INT1), 0), Array[Byte](2, 0, 0))
+
+    // Malformed metadata: offset > nextOffset for key id 0.
+    invalid(Array(objectHeader(false, 1, 1),
+      /* size */ 1,
+      /* id list */ 0,
+      /* offset list */ 0, 2,
+      /* field data */ primitiveHeader(INT1), 1),
+      Array[Byte](VERSION, 1, 2, 1) ++ Array[Byte]('a', 'b'))
+
+    // Malformed metadata: truncated offset list (declares dict size 1 but is missing nextOffset).
+    invalid(Array(objectHeader(false, 1, 1),
+      /* size */ 1,
+      /* id list */ 0,
+      /* offset list */ 0, 2,
+      /* field data */ primitiveHeader(INT1), 1),
+      Array[Byte](VERSION, 1, 0))
+
+    // Valid metadata formats: extra bits are ignored.
+    valid(Array(primitiveHeader(TRUE)), Array[Byte](VERSION | 1 << 4, 0, 0))
+    valid(Array(primitiveHeader(TRUE)), Array[Byte](VERSION | 1 << 5, 0, 0))
+
+    // Null input.
+    checkEvaluation(IsValidVariant(Literal.create(null, VariantType)), null)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
index 61fb68cfba863..bac20c6ed3533 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.AttributeMap
-import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
+import org.apache.spark.sql.catalyst.expressions.{AttributeMap, EqualTo, IsNull, Or}
+import org.apache.spark.sql.catalyst.plans.{Inner, LeftAnti, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, Join, JoinHint, NO_BROADCAST_HASH, SHUFFLE_HASH}
 import org.apache.spark.sql.catalyst.statsEstimation.StatsTestPlan
 import org.apache.spark.sql.internal.SQLConf
@@ -156,4 +156,19 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
     }
   }
 
+  test("canPlanAsBroadcastHashJoin should respect size for single-column null-aware anti join") {
+    val leftKey = left.output.head
+    val rightKey = right.output.head
+    val condition = Or(EqualTo(leftKey, rightKey), IsNull(EqualTo(leftKey, rightKey)))
+    val nullAwareAntiJoin = Join(left, right, LeftAnti, Some(condition), JoinHint.NONE)
+    val largeRight = right.copy(rowCount = 20000000, size = Some(20000000))
+
+    withSQLConf(
+      SQLConf.OPTIMIZE_NULL_AWARE_ANTI_JOIN.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+      assert(canPlanAsBroadcastHashJoin(nullAwareAntiJoin, SQLConf.get))
+      assert(!canPlanAsBroadcastHashJoin(nullAwareAntiJoin.copy(right = largeRight), SQLConf.get))
+    }
+  }
+
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala
index e685c756a4b73..dfa17f926c5a7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala
@@ -37,6 +37,7 @@ class MergeSubplansSuite extends PlanTest {
   }
 
   val testRelation = LocalRelation($"a".int, $"b".int, $"c".string)
+  val testRelation2 = LocalRelation($"d".int, $"e".int)
   val testRelationWithNonBinaryCollation = LocalRelation(
     $"utf8_binary".string("UTF8_BINARY"),
     $"utf8_lcase".string("UTF8_LCASE"))
@@ -1515,6 +1516,203 @@ class MergeSubplansSuite extends PlanTest {
     }
   }
 
+  test("SPARK-56570: `(np: Filter, cp)` does not duplicate a cpFilter already present in " +
+      "mergedChild") {
+    // The `(np: Filter, cp)` create-new branch is only reached with a non-None recursion
+    // `cpFilter` when cp has a shape that lets filter propagation bubble a cpFilter up through
+    // the recursion without being consumed by an `Aggregate`. A Join with a Filter on one side
+    // does this:
+    //   - sq1 (cp): Aggregate -> Join(testRelation, Filter(e < 5, testRelation2), a = d).
+    //   - sq2 (np): Aggregate -> Filter(a > 1) -> Join(testRelation, testRelation2, a = d).
+    // At `(Agg, Agg)` children, the pair is `(Filter, Join)`. `(Filter, cp)` fires, peels np's
+    // Filter and recurses `(Join, Join)`. The right-child recursion hits `(np, cp: Filter)`,
+    // creates `propagatedFilter_0` for `e < 5`, and `(Join, Join)` propagates that as cpFilter
+    // all the way back to the outer `(Filter, cp)` case. `mergedChild` at that point is a Join
+    // whose output already contains the `propagatedFilter_0` attribute.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2.where($"e" < 5), Inner, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.join(testRelation2, Inner, Some($"a" === $"d")).where($"a" > 1)
+        .groupBy()(sum($"a").as("sum_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    val f0Alias = Alias($"e" < 5, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val f1Alias = Alias($"a" > 1, "propagatedFilter_1")()
+    val f1 = f1Alias.toAttribute
+    val innerProject = testRelation2.select(testRelation2.output ++ Seq(f0Alias): _*)
+    val joinNode = testRelation.join(innerProject, Inner, Some($"a" === $"d"))
+    val mergedSubquery = joinNode
+      .select(joinNode.output ++ Seq(f1Alias): _*)
+      .groupBy()(
+        max($"a", Some(f0)).as("max_a"),
+        sum($"a", Some(f1)).as("sum_a"))
+      .select(CreateNamedStruct(Seq(
+        Literal("max_a"), $"max_a",
+        Literal("sum_a"), $"sum_a"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(
+        SQLConf.MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED.key -> "true",
+        SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-56570: `(np, cp: Filter)` does not duplicate an npFilter already present in " +
+      "mergedChild") {
+    // Mirror of the previous test: the `(np, cp: Filter)` create-new branch is only reached with
+    // a non-None recursion `npFilter` when np has a shape that lets filter propagation bubble up
+    // through the recursion. A Join with a Filter on one side does this:
+    //   - sq1 (cp): Aggregate -> Filter(a > 1) -> Join(testRelation, testRelation2, a = d).
+    //   - sq2 (np): Aggregate -> Join(testRelation, Filter(e < 5, testRelation2), a = d).
+    // At `(Agg, Agg)` children, the pair is `(Join, Filter)`. `(np, cp: Filter)` fires, peels
+    // cp's Filter and recurses `(Join, Join)`. The right-child recursion hits `(np: Filter, cp)`,
+    // creates `propagatedFilter_0` for `e < 5`, and `(Join, Join)` propagates that as npFilter
+    // all the way back to the outer `(np, cp: Filter)` case. `mergedChild` at that point is a
+    // Join whose output already contains the `propagatedFilter_0` attribute.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, Inner, Some($"a" === $"d")).where($"a" > 1)
+        .groupBy()(max($"a").as("max_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.join(testRelation2.where($"e" < 5), Inner, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    val f0Alias = Alias($"e" < 5, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val f1Alias = Alias($"a" > 1, "propagatedFilter_1")()
+    val f1 = f1Alias.toAttribute
+    val innerProject = testRelation2.select(testRelation2.output ++ Seq(f0Alias): _*)
+    val joinNode = testRelation.join(innerProject, Inner, Some($"a" === $"d"))
+    val mergedSubquery = joinNode
+      .select(joinNode.output ++ Seq(f1Alias): _*)
+      .groupBy()(
+        max($"a", Some(f1)).as("max_a"),
+        sum($"a", Some(f0)).as("sum_a"))
+      .select(CreateNamedStruct(Seq(
+        Literal("max_a"), $"max_a",
+        Literal("sum_a"), $"sum_a"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(
+        SQLConf.MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED.key -> "true",
+        SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-56570: tagged `(Filter, Filter)` reuse must keep mergedChild's appended columns") {
+    // Round 1-2 build a tagged Filter (condition `OR(pf_0=(b<5), pf_1=(a>1))`) over a tagged
+    // Project carrying both `propagatedFilter_*` aliases.
+    // Round 3 merges a subplan whose Filter sits *above* a user Project introducing
+    // `d = a + b`:
+    //   sq3 = Aggregate(sum(d)) -> Filter(a>1) -> Project([d=(a+b), a, b, c]) -> testRelation.
+    // At `(Agg, Agg)` children the pair is `(Filter(a>1) -> Project, Filter[tagged] -> Project)`
+    // -- neither side is a Project at this level, so `(Filter, Filter)` tagged fires directly
+    // and recurses on the children `(Project[d,a,b,c], Project[tagged][a,b,c,pf_0,pf_1])`. That
+    // recursion's `(Project, Project)` case builds `Project([a,b,c,pf_0_alias,pf_1_alias,d], t)`
+    // -- `mergedChild` now carries a column (`d`) that `cp.child` doesn't. The reuse check
+    // finds `pf_1` already matches sq3's `(a > 1)`, so the tagged-reuse branch fires and must
+    // rebuild the Filter over `mergedChild` so that `d` stays visible to the enclosing
+    // Aggregate's `sum(d)`.
+    val subquery1 = ScalarSubquery(testRelation.where($"a" > 1).groupBy()(max($"a").as("max_a")))
+    val subquery2 = ScalarSubquery(testRelation.where($"b" < 5).groupBy()(min($"b").as("min_b")))
+    val subquery3 = ScalarSubquery(
+      testRelation
+        .select(($"a" + $"b").as("d"), $"a", $"b", $"c")
+        .where($"a" > 1)
+        .groupBy()(sum($"d").as("sum_d")))
+    val originalQuery = testRelation.select(subquery1, subquery2, subquery3)
+
+    val f0Alias = Alias($"b" < 5, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val f1Alias = Alias($"a" > 1, "propagatedFilter_1")()
+    val f1 = f1Alias.toAttribute
+    val dAlias = Alias($"a" + $"b", "d")()
+    val d = dAlias.toAttribute
+    val innerProject = testRelation.select(testRelation.output ++ Seq(f0Alias, f1Alias, dAlias): _*)
+    val mergedSubquery = innerProject
+      .where(Or(f0, f1))
+      .groupBy()(
+        max($"a", Some(f1)).as("max_a"),
+        min($"b", Some(f0)).as("min_b"),
+        sum(d, Some(f1)).as("sum_d"))
+      .select(CreateNamedStruct(Seq(
+        Literal("max_a"), $"max_a",
+        Literal("min_b"), $"min_b",
+        Literal("sum_d"), $"sum_d"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1),
+        extractorExpression(0, analyzedMergedSubquery.output, 2)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-56570: `(np, cp: Filter)` drops a tagged cp Filter without synthesising a " +
+      "redundant alias") {
+    // Round 1+2: sq1 and sq2 merge via `(Filter, Filter)` first-time, creating a tagged Filter
+    // (condition `OR(pf_0=(b<5), pf_1=(a>1))`) over a tagged Project carrying both aliases.
+    // cp's aggregates are `[max(a) FILTER pf_1, min(b) FILTER pf_0]`.
+    // Round 3: sq3 has no Filter, so `(np, cp: Filter)` with cp tagged fires. Synthesising a new
+    // `propagatedFilter_2 = OR(pf_0, pf_1)` would leave the enclosing Aggregate wrapping cp's
+    // already-filtered aggregates with `FILTER AND(OR(pf_0, pf_1), pf_i)` (which simplifies to
+    // `FILTER pf_i`) -- wasted work and plan bloat. Dropping cp's Filter returns the recursion's
+    // Project unchanged, leaves cp's per-side FILTER clauses untouched, and leaves the base
+    // unrestricted for np's unfiltered aggregate.
+    val subquery1 = ScalarSubquery(testRelation.where($"a" > 1).groupBy()(max($"a").as("max_a")))
+    val subquery2 = ScalarSubquery(testRelation.where($"b" < 5).groupBy()(min($"b").as("min_b")))
+    val subquery3 = ScalarSubquery(testRelation.groupBy()(sum($"a").as("sum_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2, subquery3)
+
+    val f0Alias = Alias($"b" < 5, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val f1Alias = Alias($"a" > 1, "propagatedFilter_1")()
+    val f1 = f1Alias.toAttribute
+    val mergedSubquery = testRelation
+      .select(testRelation.output ++ Seq(f0Alias, f1Alias): _*)
+      .groupBy()(
+        max($"a", Some(f1)).as("max_a"),
+        min($"b", Some(f0)).as("min_b"),
+        sum($"a").as("sum_a"))
+      .select(CreateNamedStruct(Seq(
+        Literal("max_a"), $"max_a",
+        Literal("min_b"), $"min_b",
+        Literal("sum_a"), $"sum_a"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1),
+        extractorExpression(0, analyzedMergedSubquery.output, 2)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_SYMMETRIC_FILTER_PROPAGATION_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
   test("SPARK-40193: Merge non-grouping subqueries where one aggregate already carries a " +
       "FILTER clause") {
     val subquery1 = ScalarSubquery(testRelation.groupBy()(max($"a").as("max_a")))
@@ -1542,4 +1740,230 @@ class MergeSubplansSuite extends PlanTest {
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
   }
+
+  test("SPARK-56677: Merge non-grouping subqueries with filter on left join child") {
+    // cp (subquery1): Aggregate([], [sum(a)], Join(testRelation, testRelation2, a=d))
+    // np (subquery2): Aggregate([], [max(a)], Join(Filter(a>1, testRelation), testRelation2, a=d))
+    // The filter on the left join child propagates as a boolean attribute through the Join node
+    // and is consumed as a FILTER (WHERE ...) clause on the np-side aggregate expression.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, Inner, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.where($"a" > 1).join(testRelation2, Inner, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    val f0Alias = Alias($"a" > 1, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val mergedSubquery = testRelation
+      .select(testRelation.output ++ Seq(f0Alias): _*)
+      .join(testRelation2, Inner, Some($"a" === $"d"))
+      .groupBy()(
+        sum($"a").as("sum_a"),
+        max($"a", Some(f0)).as("max_a"))
+      .select(CreateNamedStruct(Seq(
+        Literal("sum_a"), $"sum_a",
+        Literal("max_a"), $"max_a"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-56677: Merge non-grouping subqueries with filter on right join child") {
+    // cp (subquery1): Aggregate([], [sum(a)], Join(testRelation, testRelation2, a=d))
+    // np (subquery2): Aggregate([], [max(d)], Join(testRelation, Filter(d>1, testRelation2), a=d))
+    // The filter on the right join child propagates analogously to the left-child case.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, Inner, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.join(testRelation2.where($"d" > 1), Inner, Some($"a" === $"d"))
+        .groupBy()(max($"d").as("max_d")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    val f0Alias = Alias($"d" > 1, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val mergedSubquery = testRelation
+      .join(
+        testRelation2.select(testRelation2.output ++ Seq(f0Alias): _*),
+        Inner, Some($"a" === $"d"))
+      .groupBy()(
+        sum($"a").as("sum_a"),
+        max($"d", Some(f0)).as("max_d"))
+      .select(CreateNamedStruct(Seq(
+        Literal("sum_a"), $"sum_a",
+        Literal("max_d"), $"max_d"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-56677: Merge non-grouping subqueries with filter on left child of a Cross join") {
+    // Cross join never NULL-pads either side, so filter propagation is safe from both sides.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, Cross, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.where($"a" > 1).join(testRelation2, Cross, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    val f0Alias = Alias($"a" > 1, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val mergedSubquery = testRelation
+      .select(testRelation.output ++ Seq(f0Alias): _*)
+      .join(testRelation2, Cross, Some($"a" === $"d"))
+      .groupBy()(
+        sum($"a").as("sum_a"),
+        max($"a", Some(f0)).as("max_a"))
+      .select(CreateNamedStruct(Seq(
+        Literal("sum_a"), $"sum_a",
+        Literal("max_a"), $"max_a"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-56677: Do not merge subqueries when both join children have independent filters") {
+    // np has filters on BOTH left and right join children simultaneously. The guard in the
+    // Join case prevents this merge because combining two independent filter attributes would
+    // require ANDing them into a new alias, which is not yet supported.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, Inner, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.where($"a" > 1).join(testRelation2.where($"d" > 1), Inner, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze)
+    }
+  }
+
+  test("SPARK-56677: Merge non-grouping subqueries with filter on left side of LeftSemi join") {
+    // Left-side filter attributes ARE in the LeftSemi join output, so propagation is safe.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, LeftSemi, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.where($"a" > 1).join(testRelation2, LeftSemi, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    val f0Alias = Alias($"a" > 1, "propagatedFilter_0")()
+    val f0 = f0Alias.toAttribute
+    val mergedSubquery = testRelation
+      .select(testRelation.output ++ Seq(f0Alias): _*)
+      .join(testRelation2, LeftSemi, Some($"a" === $"d"))
+      .groupBy()(
+        sum($"a").as("sum_a"),
+        max($"a", Some(f0)).as("max_a"))
+      .select(CreateNamedStruct(Seq(
+        Literal("sum_a"), $"sum_a",
+        Literal("max_a"), $"max_a"
+      )).as("mergedValue"))
+    val analyzedMergedSubquery = mergedSubquery.analyze
+    val correctAnswer = WithCTE(
+      testRelation.select(
+        extractorExpression(0, analyzedMergedSubquery.output, 0),
+        extractorExpression(0, analyzedMergedSubquery.output, 1)),
+      Seq(definitionNode(analyzedMergedSubquery, 0)))
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-56677: Do not merge subqueries when filter is on the right side of a LeftSemi join") {
+    // Right-side filter attributes are NOT in the LeftSemi join output (only left-side columns
+    // are produced). Propagating such a filter would create an unresolvable attribute reference
+    // in the parent Aggregate's FILTER clause.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, LeftSemi, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.join(testRelation2.where($"d" > 1), LeftSemi, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze)
+    }
+  }
+
+  test("SPARK-56677: Do not merge subqueries when filter is on the nullable side of an outer " +
+      "join") {
+    // For a RightOuter join the left side is nullable: unmatched right rows produce NULL for all
+    // left-side columns including the filter attribute f, so FILTER (WHERE f=NULL) would
+    // incorrectly exclude those rows from the aggregate even though they appear in the join result.
+    // The same problem applies to the right side of a LeftOuter join and both sides of FullOuter.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, RightOuter, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.where($"a" > 1).join(testRelation2, RightOuter, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze)
+    }
+  }
+
+  test("SPARK-56677: Do not merge subqueries when filter is on either side of a FullOuter join") {
+    // For a FullOuter join both sides are nullable: unmatched rows from either side produce NULL
+    // for the other side's columns. A filter attribute from either side would be NULL for those
+    // unmatched rows, making propagation unsafe from both sides.
+    val subquery1 = ScalarSubquery(
+      testRelation.join(testRelation2, FullOuter, Some($"a" === $"d"))
+        .groupBy()(sum($"a").as("sum_a")))
+    val subquery2 = ScalarSubquery(
+      testRelation.where($"a" > 1).join(testRelation2, FullOuter, Some($"a" === $"d"))
+        .groupBy()(max($"a").as("max_a")))
+    val originalQuery = testRelation.select(subquery1, subquery2)
+
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "true") {
+      comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze)
+    }
+  }
+
+  test("SPARK-56677: Do not merge subqueries with filter propagation through join when disabled") {
+    withSQLConf(SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key -> "false") {
+      val subquery1 = ScalarSubquery(
+        testRelation.join(testRelation2, Inner, Some($"a" === $"d"))
+          .groupBy()(sum($"a").as("sum_a")))
+      val subquery2 = ScalarSubquery(
+        testRelation.where($"a" > 1).join(testRelation2, Inner, Some($"a" === $"d"))
+          .groupBy()(max($"a").as("max_a")))
+      val originalQuery = testRelation.select(subquery1, subquery2)
+
+      comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
index 21049ca3546dc..a0a9c8ec32243 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
@@ -19,10 +19,11 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{CaseWhen, If, IsNull, KnownFloatingPointNormalized}
+import org.apache.spark.sql.catalyst.expressions.{ArrayDistinct, ArrayExcept, ArrayIntersect, ArraysOverlap, ArrayTransform, ArrayUnion, CaseWhen, Expression, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, NamedLambdaVariable}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types.DoubleType
 
 class NormalizeFloatingPointNumbersSuite extends PlanTest {
 
@@ -34,6 +35,18 @@ class NormalizeFloatingPointNumbersSuite extends PlanTest {
   val a = testRelation1.output(0)
   val testRelation2 = LocalRelation($"a".double)
   val b = testRelation2.output(0)
+  val arrayRelation = LocalRelation($"arr1".array(DoubleType), $"arr2".array(DoubleType))
+  val arr1 = arrayRelation.output(0)
+  val arr2 = arrayRelation.output(1)
+
+  private def normalizedArray(e: Expression): KnownFloatingPointNormalized = {
+    val lv = NamedLambdaVariable("arg", DoubleType, nullable = true)
+    KnownFloatingPointNormalized(
+      ArrayTransform(e,
+        LambdaFunction(
+          KnownFloatingPointNormalized(NormalizeNaNAndZero(lv)),
+          Seq(lv))))
+  }
 
   test("normalize floating points in window function expressions") {
     val query = testRelation1.window(Seq(sum(a).as("sum")), Seq(a), Seq(a.asc))
@@ -132,5 +145,108 @@ class NormalizeFloatingPointNumbersSuite extends PlanTest {
     val normalizedExpr = NormalizeFloatingNumbers.normalize(nestedExpr)
     assert(nestedExpr.dataType == normalizedExpr.dataType)
   }
+
+  test("SPARK-54918: normalize floating points in array_distinct") {
+    val query = arrayRelation.select(ArrayDistinct(arr1).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val correctAnswer = arrayRelation.select(ArrayDistinct(normalizedArray(arr1)).as("result"))
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in array_distinct - idempotence") {
+    val query = arrayRelation.select(ArrayDistinct(arr1).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val doubleOptimized = Optimize.execute(optimized)
+    val correctAnswer = arrayRelation.select(ArrayDistinct(normalizedArray(arr1)).as("result"))
+
+    comparePlans(doubleOptimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in array_union") {
+    val query = arrayRelation.select(ArrayUnion(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val correctAnswer = arrayRelation.select(
+      ArrayUnion(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in array_union - idempotence") {
+    val query = arrayRelation.select(ArrayUnion(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val doubleOptimized = Optimize.execute(optimized)
+    val correctAnswer = arrayRelation.select(
+      ArrayUnion(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(doubleOptimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in array_intersect") {
+    val query = arrayRelation.select(ArrayIntersect(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val correctAnswer = arrayRelation.select(
+      ArrayIntersect(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in array_intersect - idempotence") {
+    val query = arrayRelation.select(ArrayIntersect(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val doubleOptimized = Optimize.execute(optimized)
+    val correctAnswer = arrayRelation.select(
+      ArrayIntersect(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(doubleOptimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in array_except") {
+    val query = arrayRelation.select(ArrayExcept(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val correctAnswer = arrayRelation.select(
+      ArrayExcept(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in array_except - idempotence") {
+    val query = arrayRelation.select(ArrayExcept(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val doubleOptimized = Optimize.execute(optimized)
+    val correctAnswer = arrayRelation.select(
+      ArrayExcept(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(doubleOptimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in arrays_overlap") {
+    val query = arrayRelation.select(ArraysOverlap(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val correctAnswer = arrayRelation.select(
+      ArraysOverlap(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-54918: normalize floating points in arrays_overlap - idempotence") {
+    val query = arrayRelation.select(ArraysOverlap(arr1, arr2).as("result"))
+
+    val optimized = Optimize.execute(query)
+    val doubleOptimized = Optimize.execute(optimized)
+    val correctAnswer = arrayRelation.select(
+      ArraysOverlap(normalizedArray(arr1), normalizedArray(arr2)).as("result"))
+
+    comparePlans(doubleOptimized, correctAnswer)
+  }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index 09eb1f5864214..56ee9a36c7788 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.DefaultCatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, StringType, StructType}
 
@@ -57,7 +57,7 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
   }
 
   object Optimize extends Optimizer(
-    new CatalogManager(
+    new DefaultCatalogManager(
       FakeV2SessionCatalog,
       new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry))) {
     val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
index 70a2ae94109fc..57b9df6512b5a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
@@ -21,13 +21,13 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Add, Alias, ArrayCompact, AttributeReference, CreateArray, CreateStruct, IntegerLiteral, Literal, MapFromEntries, Multiply, NamedExpression, Remainder}
+import org.apache.spark.sql.catalyst.expressions.{Add, Alias, ArrayCompact, AttributeReference, CreateArray, CreateStruct, IntegerLiteral, Literal, MapFromEntries, Multiply, NamedExpression, NullIf, Remainder, RuntimeReplaceable}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Sum
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, BooleanType, IntegerType, MapType, StructField, StructType}
 
 /**
  * A dummy optimizer rule for testing that decrements integer literals until 0.
@@ -334,4 +334,24 @@ class OptimizerSuite extends PlanTest {
     assert(optimized2.schema ===
       StructType(StructField("map", MapType(IntegerType, IntegerType, false), false) :: Nil))
   }
+
+  test("SPARK-56840: NullIf typed null branch is replaced with a null literal") {
+    val optimizer = new SimpleTestOptimizer() {
+      override def defaultBatches: Seq[Batch] =
+        Batch("test", fixedPoint,
+          ReplaceExpressions) :: Nil
+    }
+
+    withSQLConf(SQLConf.ALWAYS_INLINE_COMMON_EXPR.key -> "true") {
+      val nullIf = new NullIf(Literal(true), Literal(true))
+      val plan = Project(Alias(nullIf, "out")() :: Nil, OneRowRelation()).analyze
+      val optimized = optimizer.execute(plan)
+
+      assert(optimized.expressions.exists(_.exists {
+        case Literal(null, BooleanType) => true
+        case _ => false
+      }))
+      assert(optimized.expressions.forall(!_.exists(_.isInstanceOf[RuntimeReplaceable])))
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteNearestByJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteNearestByJoinSuite.scala
new file mode 100644
index 0000000000000..729b58394d4bc
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteNearestByJoinSuite.scala
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, CreateStruct, Inline, Literal, Rand, Uuid}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, First, MaxMinByK}
+import org.apache.spark.sql.catalyst.plans.{Inner, JoinType, LeftOuter, NearestByDistance, NearestBySimilarity, PlanTest}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Generate, Join, JoinHint, LocalRelation, NearestByJoin, Project}
+import org.apache.spark.sql.types.IntegerType
+
+class RewriteNearestByJoinSuite extends PlanTest {
+
+  // The rewrite synthesizes `Uuid(Some(<random>))` for `__qid`, whose seed is fresh per call;
+  // expected plans below use `Uuid(Some(0L))`, and we normalize the actual plan's `Uuid`
+  // seeds to 0L before `comparePlans` so the structural shape is the only thing being
+  // compared, not the (necessarily different) random seed values.
+  private def normalizeUuidSeed(plan: org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)
+      : org.apache.spark.sql.catalyst.plans.logical.LogicalPlan =
+    plan.transformAllExpressions { case _: Uuid => Uuid(Some(0L)) }
+
+  private def expectedRewrite(
+      left: LocalRelation,
+      right: LocalRelation,
+      numResults: Int,
+      ranking: org.apache.spark.sql.catalyst.expressions.Expression,
+      reverse: Boolean,
+      joinType: JoinType) = {
+    val qidAlias = Alias(Uuid(Some(0L)), "__qid")()
+    val taggedLeft = Project(left.output :+ qidAlias, left)
+    val join = Join(taggedLeft, right, joinType, None, JoinHint.NONE)
+
+    val rightStruct = CreateStruct(right.output)
+    val topKAgg = MaxMinByK(
+      rightStruct, ranking, Literal(numResults), reverse = reverse)
+      .toAggregateExpression()
+    val matchesAlias = Alias(topKAgg, "__nearest_matches__")()
+    val firstLeftAggs = left.output.map { attr =>
+      Alias(
+        First(attr, ignoreNulls = false).toAggregateExpression(),
+        attr.name)(exprId = attr.exprId, qualifier = attr.qualifier)
+    }
+    val aggregate = Aggregate(
+      Seq(qidAlias.toAttribute), firstLeftAggs :+ matchesAlias, join)
+
+    val generatorOutput = right.output.map { a =>
+      AttributeReference(a.name, a.dataType, nullable = true)(
+        exprId = a.exprId, qualifier = a.qualifier)
+    }
+    val generate = Generate(
+      Inline(matchesAlias.toAttribute),
+      unrequiredChildIndex = Seq(aggregate.output.indexOf(matchesAlias.toAttribute)),
+      outer = joinType == LeftOuter,
+      qualifier = None,
+      generatorOutput = generatorOutput,
+      child = aggregate)
+    // Mirror the rewrite's final Project that constrains the output schema to
+    // `NearestByJoin.output` (left and right widened to nullable).
+    val expectedOutput =
+      left.output.map(_.withNullability(true)) ++ right.output.map(_.withNullability(true))
+    Project(expectedOutput, generate)
+  }
+
+  test("similarity, inner, k=5") {
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, Inner, approx = true, numResults = 5,
+      rankingExpression = left.output(0) + right.output(0),
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val expected = expectedRewrite(
+      left, right, 5,
+      ranking = left.output(0) + right.output(0),
+      reverse = false, joinType = Inner)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("distance, inner, k=3") {
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, Inner, approx = true, numResults = 3,
+      rankingExpression = left.output(0) - right.output(0),
+      direction = NearestByDistance)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val expected = expectedRewrite(
+      left, right, 3,
+      ranking = left.output(0) - right.output(0),
+      reverse = true, joinType = Inner)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("similarity, left outer, k=1") {
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, LeftOuter, approx = true, numResults = 1,
+      rankingExpression = left.output(0) + right.output(0),
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val expected = expectedRewrite(
+      left, right, 1,
+      ranking = left.output(0) + right.output(0),
+      reverse = false, joinType = LeftOuter)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("distance, left outer, k=2") {
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, LeftOuter, approx = true, numResults = 2,
+      rankingExpression = left.output(0) - right.output(0),
+      direction = NearestByDistance)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val expected = expectedRewrite(
+      left, right, 2,
+      ranking = left.output(0) - right.output(0),
+      reverse = true, joinType = LeftOuter)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("synthetic Join uses the user's joinType") {
+    // Locks in that the rewrite's synthetic Join carries the user's `joinType`
+    // (Inner or LeftOuter).
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    Seq(Inner, LeftOuter).foreach { joinType =>
+      val query = NearestByJoin(
+        left, right, joinType, approx = true, numResults = 1,
+        rankingExpression = left.output(0) + right.output(0),
+        direction = NearestBySimilarity)
+
+      val rewritten = RewriteNearestByJoin(query.analyze)
+      val syntheticJoin = rewritten.collect { case j: Join => j }
+      assert(syntheticJoin.size == 1,
+        s"expected exactly one synthetic Join in the rewritten plan, got ${syntheticJoin.size}")
+      assert(syntheticJoin.head.joinType == joinType,
+        s"expected synthetic Join to use $joinType, got ${syntheticJoin.head.joinType}")
+
+      val generate = rewritten.collect { case g: Generate => g }
+      assert(generate.size == 1,
+        s"expected exactly one Generate in the rewritten plan, got ${generate.size}")
+      val expectedOuter = joinType == LeftOuter
+      assert(generate.head.outer == expectedOuter,
+        s"expected Generate.outer == $expectedOuter for $joinType, got ${generate.head.outer}")
+    }
+  }
+
+  test("EXACT (approx = false) produces the same rewrite as APPROX") {
+    // Locks in the current invariant that APPROX and EXACT lower through the same
+    // brute-force rewrite. If a future change diverges them (e.g. an APPROX-only
+    // indexed-ANN strategy lands), this test fails and forces an intentional update.
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, Inner, approx = false, numResults = 5,
+      rankingExpression = left.output(0) + right.output(0),
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val expected = expectedRewrite(
+      left, right, 5,
+      ranking = left.output(0) + right.output(0),
+      reverse = false, joinType = Inner)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("k = 1 (lower boundary)") {
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, Inner, approx = true, numResults = 1,
+      rankingExpression = left.output(0) + right.output(0),
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val expected = expectedRewrite(
+      left, right, 1,
+      ranking = left.output(0) + right.output(0),
+      reverse = false, joinType = Inner)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("k = NearestByJoin.MaxNumResults (upper boundary)") {
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, Inner, approx = true, numResults = NearestByJoin.MaxNumResults,
+      rankingExpression = left.output(0) + right.output(0),
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val expected = expectedRewrite(
+      left, right, NearestByJoin.MaxNumResults,
+      ranking = left.output(0) + right.output(0),
+      reverse = false, joinType = Inner)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("self-join: rewrite resolves duplicate ExprIds via DeduplicateRelations") {
+    // Exercises the NearestByJoin arm in DeduplicateRelations. Without it, `.analyze` on
+    // a self-join would leave the right side sharing ExprIds with the left and the
+    // CheckAnalysis arm would throw an internal error.
+    val t = LocalRelation($"a".int, $"b".int)
+    val query = NearestByJoin(
+      t, t, Inner, approx = true, numResults = 1,
+      rankingExpression = t.output(0) + t.output(0),
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val tDup = LocalRelation($"a".int, $"b".int)
+    val expected = expectedRewrite(
+      t, tDup, 1,
+      ranking = t.output(0) + tDup.output(0),
+      reverse = false, joinType = Inner)
+
+    comparePlans(normalizeUuidSeed(rewritten), expected, checkAnalysis = false)
+  }
+
+  test("APPROX with nondeterministic ranking pre-materializes via Project") {
+    // Locks in the Project-injection shape: when the ranking expression is nondeterministic
+    // (legal only under APPROX), the rewrite inserts a Project above the Join that aliases
+    // the ranking value as `__ranking__`. MaxMinByK then sees a plain AttributeReference as
+    // its ordering input. This relies on Projection's standard partition-aware initialization
+    // to call `Rand.initialize` once per partition before any value is evaluated; otherwise
+    // MaxMinByK would call `eval` on an uninitialized Rand and throw at runtime. If a future
+    // optimizer change folds this Project away, this test fails and forces an intentional
+    // update.
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val ranking = Rand(Literal(0L)) + right.output(0)
+    val query = NearestByJoin(
+      left, right, Inner, approx = true, numResults = 1,
+      rankingExpression = ranking,
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+
+    val agg = rewritten.collect { case a: Aggregate => a }.head
+    assert(agg.child.isInstanceOf[Project],
+      s"expected materializing Project above the Join when ranking is nondeterministic, " +
+        s"got ${agg.child.getClass.getSimpleName}")
+    val maxMinByK = agg.aggregateExpressions.collectFirst {
+      case Alias(AggregateExpression(m: MaxMinByK, _, _, _, _), "__nearest_matches__") => m
+    }.getOrElse(fail("expected MaxMinByK aggregate in the rewritten plan"))
+    assert(maxMinByK.orderingExpr.isInstanceOf[AttributeReference],
+      "ranking expression should be materialized as an attribute, not evaluated inside MaxMinByK")
+    assert(maxMinByK.orderingExpr.asInstanceOf[AttributeReference].name == "__ranking__")
+    assert(rewritten.exists(_.expressions.exists(_.exists(_.isInstanceOf[Rand]))),
+      "Rand should still appear in the plan -- inside the materializing Project, not lost")
+  }
+
+  test("APPROX with deterministic ranking does NOT inject the materializing Project") {
+    // Counterpart to the test above: confirms the Project-injection is gated on
+    // `!rankingExpression.deterministic` so the deterministic path's plan shape is unchanged.
+    val left = LocalRelation($"a".int, $"b".int)
+    val right = LocalRelation($"x".int, $"y".int)
+    val query = NearestByJoin(
+      left, right, Inner, approx = true, numResults = 1,
+      rankingExpression = left.output(0) + right.output(0),
+      direction = NearestBySimilarity)
+
+    val rewritten = RewriteNearestByJoin(query.analyze)
+    val agg = rewritten.collect { case a: Aggregate => a }.head
+    assert(agg.child.isInstanceOf[Join],
+      s"expected Aggregate's child to be the Join directly when ranking is deterministic, " +
+        s"got ${agg.child.getClass.getSimpleName}")
+  }
+
+  test("output declares both left- and right-side attributes nullable") {
+    // The rewrite carries left columns through `First` aggregates (always nullable result type)
+    // and right columns through `Inline` over `MaxMinByK`'s `ArrayType(.., containsNull = true)`
+    // (every struct field becomes nullable). NearestByJoin.output must reflect both widenings
+    // so the analyzed schema matches the optimized plan; otherwise cached / written outputs
+    // would advertise a stricter nullability than the data actually carries.
+    val left = LocalRelation(
+      AttributeReference("a", IntegerType, nullable = false)(),
+      AttributeReference("b", IntegerType, nullable = false)())
+    val right = LocalRelation(
+      AttributeReference("x", IntegerType, nullable = false)(),
+      AttributeReference("y", IntegerType, nullable = false)())
+    val query = NearestByJoin(
+      left, right, Inner, approx = true, numResults = 1,
+      rankingExpression = left.output(0) + right.output(0),
+      direction = NearestBySimilarity)
+
+    assert(left.output.forall(!_.nullable),
+      "preconditions: left input attributes should start non-nullable")
+    assert(right.output.forall(!_.nullable),
+      "preconditions: right input attributes should start non-nullable")
+    assert(query.output.forall(_.nullable),
+      "NearestByJoin.output should declare every attribute nullable, regardless of the " +
+        "nullability of the underlying inputs")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
index c45a761353c85..dca1d503e3fdd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Cast, IsNull, ListQuery, Not}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Exists, IsNull, ListQuery, Literal, Not}
 import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, LeftSemi, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -96,4 +96,42 @@ class RewriteSubquerySuite extends PlanTest {
       .select($"exists".as("(sum(col2) IN (listquery()))")).analyze
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-57005: No None.get when correlated predicates are eliminated") {
+    // When BooleanSimplification in PullupCorrelatedPredicates eliminates all correlated
+    // predicates (e.g., FALSE AND correlated_pred -> FALSE), the Exists node ends up with
+    // outerAttrs non-empty but joinCond empty. RewritePredicateSubquery must handle this.
+    object OptimizeWithPullup extends RuleExecutor[LogicalPlan] {
+      val batches =
+        Batch("Pullup Correlated Expressions", Once,
+          PullupCorrelatedPredicates) ::
+        Batch("Rewrite Subquery", FixedPoint(1),
+          RewritePredicateSubquery,
+          PruneFilters,
+          PropagateEmptyRelation,
+          ColumnPruning,
+          CollapseProject,
+          RemoveNoopOperators) :: Nil
+    }
+
+    val outer = LocalRelation($"a".int, $"b".int)
+    val inner = LocalRelation($"x".int, $"y".int)
+
+    // NOT EXISTS with FALSE AND correlated_pred: subquery is always empty,
+    // so NOT EXISTS is always true and the filter is eliminated.
+    // Since outer is an empty LocalRelation, the result is also empty.
+    val notExistsQuery = outer.where(
+      Not(Exists(inner.where(Literal.FalseLiteral && $"a" === $"x")))).select($"a")
+    val notExistsOptimized = OptimizeWithPullup.execute(notExistsQuery.analyze)
+    val notExistsExpected = LocalRelation(notExistsQuery.analyze.output).analyze
+    comparePlans(notExistsOptimized, notExistsExpected)
+
+    // EXISTS with FALSE AND correlated_pred: subquery is always empty,
+    // so EXISTS is always false and no rows pass the filter.
+    val existsQuery = outer.where(
+      Exists(inner.where(Literal.FalseLiteral && $"a" === $"x"))).select($"a")
+    val existsOptimized = OptimizeWithPullup.execute(existsQuery.analyze)
+    val existsExpected = LocalRelation(existsQuery.analyze.output).analyze
+    comparePlans(existsOptimized, existsExpected)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimpleTestOptimizer.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimpleTestOptimizer.scala
index 007a2b3fd0589..b36cfb930c56e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimpleTestOptimizer.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimpleTestOptimizer.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, EmptyTableFunctionRegistry, FakeV2SessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
-import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.DefaultCatalogManager
 
 /**
  * An optimizer used in test code.
@@ -29,6 +29,6 @@ import org.apache.spark.sql.connector.catalog.CatalogManager
 object SimpleTestOptimizer extends SimpleTestOptimizer
 
 class SimpleTestOptimizer extends Optimizer(
-  new CatalogManager(
+  new DefaultCatalogManager(
     FakeV2SessionCatalog,
     new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, EmptyTableFunctionRegistry)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 1ac417ddc9376..1db22037d31f8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1790,26 +1790,28 @@ class DDLParserSuite extends AnalysisTest {
           Literal(5))))
   }
 
-  test("insert table: REPLACE WHERE with tableAlias and BY NAME") {
-    parseCompare(
-      "INSERT INTO testcat.ns1.ns2.tbl AS t BY NAME REPLACE WHERE a > 5 SELECT * FROM source",
-      OverwriteByExpression.byName(
-        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
-        Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
-        GreaterThan(
-          UnresolvedAttribute("a"),
-          Literal(5))))
+  test("insert table: REPLACE WHERE rejects tableAlias with BY NAME") {
+    val sql =
+      "INSERT INTO testcat.ns1.ns2.tbl AS t BY NAME REPLACE WHERE a > 5 SELECT * FROM source"
+    checkError(
+      exception = parseException(sql),
+      condition = "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "INSERT INTO testcat.ns1.ns2.tbl AS t BY NAME REPLACE WHERE a > 5",
+        start = 0, stop = 63))
   }
 
-  test("insert table: REPLACE WHERE with tableAlias without BY NAME") {
-    parseCompare(
-      "INSERT INTO testcat.ns1.ns2.tbl AS t REPLACE WHERE a > 5 SELECT * FROM source",
-      OverwriteByExpression.byPosition(
-        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
-        Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
-        GreaterThan(
-          UnresolvedAttribute("a"),
-          Literal(5))))
+  test("insert table: REPLACE WHERE rejects tableAlias without BY NAME") {
+    val sql =
+      "INSERT INTO testcat.ns1.ns2.tbl AS t REPLACE WHERE a > 5 SELECT * FROM source"
+    checkError(
+      exception = parseException(sql),
+      condition = "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "INSERT INTO testcat.ns1.ns2.tbl AS t REPLACE WHERE a > 5",
+        start = 0, stop = 55))
   }
 
   for {
@@ -2761,7 +2763,7 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("CACHE TABLE t AS SELECT * FROM testData"),
       CacheTableAsSelect(
-        "t",
+        Literal("t"),
         Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testData"))),
         "SELECT * FROM testData",
         false,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index edaa7aee5cabb..b17634e0b56b9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{EvaluateUnresolvedInlineTable, IntervalUtils}
-import org.apache.spark.sql.connector.catalog.{ChangelogInfo, ChangelogRange}
+import org.apache.spark.sql.connector.catalog.{ChangelogContext, ChangelogRange}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{Decimal, DecimalType, IntegerType, LongType, StringType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -826,6 +826,145 @@ class PlanParserSuite extends AnalysisTest {
     )
   }
 
+  test("nearest-by join") {
+    assertEqual(
+      "select * from t join u approx nearest 5 by similarity t.a + u.a",
+      NearestByJoin(
+        table("t"),
+        table("u"),
+        Inner,
+        approx = true,
+        numResults = 5,
+        rankingExpression = $"t.a" + $"u.a",
+        direction = NearestBySimilarity).select(star()))
+
+    assertEqual(
+      "select * from t inner join u exact nearest 3 by distance t.a - u.a",
+      NearestByJoin(
+        table("t"),
+        table("u"),
+        Inner,
+        approx = false,
+        numResults = 3,
+        rankingExpression = $"t.a" - $"u.a",
+        direction = NearestByDistance).select(star()))
+
+    assertEqual(
+      "select * from t left outer join u approx nearest by similarity t.a + u.a",
+      NearestByJoin(
+        table("t"),
+        table("u"),
+        LeftOuter,
+        approx = true,
+        numResults = 1,
+        rankingExpression = $"t.a" + $"u.a",
+        direction = NearestBySimilarity).select(star()))
+
+    // Unsupported join type.
+    val sqlRightOuter =
+      "select * from t right outer join u approx nearest 1 by similarity t.a"
+    checkError(
+      exception = parseException(sqlRightOuter),
+      condition = "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+      parameters = Map(
+        "joinType" -> "RIGHT OUTER",
+        "supported" -> "'INNER', 'LEFT OUTER'"),
+      context = ExpectedContext(
+        fragment = "right outer join u approx nearest 1 by similarity t.a",
+        start = 16,
+        stop = 68))
+
+    val sqlFullOuter =
+      "select * from t full outer join u approx nearest 1 by similarity t.a"
+    checkError(
+      exception = parseException(sqlFullOuter),
+      condition = "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+      parameters = Map(
+        "joinType" -> "FULL OUTER",
+        "supported" -> "'INNER', 'LEFT OUTER'"),
+      context = ExpectedContext(
+        fragment = "full outer join u approx nearest 1 by similarity t.a",
+        start = 16,
+        stop = 67))
+
+    val sqlCross =
+      "select * from t cross join u approx nearest 1 by similarity t.a"
+    checkError(
+      exception = parseException(sqlCross),
+      condition = "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+      parameters = Map(
+        "joinType" -> "CROSS",
+        "supported" -> "'INNER', 'LEFT OUTER'"),
+      context = ExpectedContext(
+        fragment = "cross join u approx nearest 1 by similarity t.a",
+        start = 16,
+        stop = 62))
+
+    // LATERAL + NEAREST BY not allowed.
+    val sqlLateral =
+      "select * from t join lateral (select * from u) uu approx nearest 1 by similarity 1"
+    checkError(
+      exception = parseException(sqlLateral),
+      condition = "UNSUPPORTED_FEATURE.LATERAL_JOIN_NEAREST_BY",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "join lateral (select * from u) uu approx nearest 1 by similarity 1",
+        start = 16,
+        stop = 81))
+
+    // num_results out of range.
+    val sqlTooSmall =
+      "select * from t join u approx nearest 0 by similarity t.a"
+    checkError(
+      exception = parseException(sqlTooSmall),
+      condition = "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+      parameters = Map("numResults" -> "0", "min" -> "1", "max" -> "100000"),
+      context = ExpectedContext(
+        fragment = "join u approx nearest 0 by similarity t.a",
+        start = 16,
+        stop = 56))
+
+    val sqlTooLarge =
+      "select * from t join u approx nearest 100001 by distance t.a"
+    checkError(
+      exception = parseException(sqlTooLarge),
+      condition = "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+      parameters = Map("numResults" -> "100001", "min" -> "1", "max" -> "100000"),
+      context = ExpectedContext(
+        fragment = "join u approx nearest 100001 by distance t.a",
+        start = 16,
+        stop = 59))
+
+    // Literal that overflows Long (>19 digits) should surface as the standard out-of-range
+    // error, not an unwrapped NumberFormatException.
+    val sqlOverflow =
+      "select * from t join u approx nearest 99999999999999999999 by distance t.a"
+    checkError(
+      exception = parseException(sqlOverflow),
+      condition = "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+      parameters = Map(
+        "numResults" -> "99999999999999999999",
+        "min" -> "1",
+        "max" -> "100000"),
+      context = ExpectedContext(
+        fragment = "join u approx nearest 99999999999999999999 by distance t.a",
+        start = 16,
+        stop = 73))
+  }
+
+  test("nearest-by keywords are non-reserved (usable as identifiers)") {
+    // The five new keywords (APPROX, DISTANCE, EXACT, NEAREST, SIMILARITY) must remain
+    // non-reserved so they can continue to be used as column or table identifiers.
+    Seq("approx", "distance", "exact", "nearest", "similarity").foreach { kw =>
+      // As a column identifier in the SELECT list.
+      parsePlan(s"select $kw from t")
+      // As a table identifier in the FROM clause.
+      parsePlan(s"select * from $kw")
+    }
+    // All five together in a single SELECT list.
+    parsePlan("select approx, distance, exact, nearest, similarity from t")
+  }
+
   test("sampled relations") {
     val sql = "select * from t"
     assertEqual(s"$sql tablesample(100 rows)",
@@ -885,6 +1024,207 @@ class PlanParserSuite extends AnalysisTest {
         stop = 65))
   }
 
+  test("SPARK-55978: TABLESAMPLE SYSTEM and BERNOULLI - basic parsing") {
+    val sql = "select * from t"
+    // SYSTEM produces SampleMethod.System
+    assertEqual(
+      s"$sql tablesample system (43 percent) as x",
+      Sample(0, .43d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.System).select(star()))
+    // BERNOULLI produces SampleMethod.Bernoulli
+    assertEqual(
+      s"$sql tablesample bernoulli (43 percent) as x",
+      Sample(0, .43d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.Bernoulli).select(star()))
+    // No qualifier defaults to Bernoulli (backward compat)
+    assertEqual(
+      s"$sql tablesample(43 percent) as x",
+      Sample(0, .43d, withReplacement = false, None,
+        table("t").as("x")).select(star()))
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM - case insensitivity") {
+    val sql = "select * from t"
+    // Keywords are case-insensitive
+    assertEqual(
+      s"$sql TABLESAMPLE SYSTEM (43 PERCENT) as x",
+      Sample(0, .43d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.System).select(star()))
+    assertEqual(
+      s"$sql TabLeSaMpLe SyStEm (43 PeRcEnT) as x",
+      Sample(0, .43d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.System).select(star()))
+    assertEqual(
+      s"$sql TABLESAMPLE BERNOULLI (43 PERCENT) as x",
+      Sample(0, .43d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.Bernoulli).select(star()))
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM - boundary fractions") {
+    val sql = "select * from t"
+    // 0 PERCENT
+    assertEqual(
+      s"$sql tablesample system (0 percent) as x",
+      Sample(0, 0d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.System).select(star()))
+    // 100 PERCENT
+    assertEqual(
+      s"$sql tablesample system (100 percent) as x",
+      Sample(0, 1d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.System).select(star()))
+    // Fractional percent
+    assertEqual(
+      s"$sql tablesample system (0.1 percent) as x",
+      Sample(0, 0.001d, withReplacement = false, None,
+        table("t").as("x"), SampleMethod.System).select(star()))
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM - unsupported sample methods") {
+    val sql = "select * from t"
+    // SYSTEM + ROWS -> error
+    checkError(
+      exception = parseException(s"$sql tablesample system (100 rows)"),
+      condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_SAMPLE_METHOD",
+      sqlState = "0A000",
+      parameters = Map("sampleMethod" -> "ROWS"),
+      context = ExpectedContext(
+        fragment = "tablesample system (100 rows)",
+        start = 16,
+        stop = 44))
+    // SYSTEM + BYTES -> error
+    checkError(
+      exception = parseException(s"$sql tablesample system (300M)"),
+      condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_SAMPLE_METHOD",
+      sqlState = "0A000",
+      parameters = Map("sampleMethod" -> "BYTES"),
+      context = ExpectedContext(
+        fragment = "tablesample system (300M)",
+        start = 16,
+        stop = 40))
+    // SYSTEM + BUCKET -> error
+    checkError(
+      exception = parseException(s"$sql tablesample system (bucket 4 out of 10)"),
+      condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_SAMPLE_METHOD",
+      sqlState = "0A000",
+      parameters = Map("sampleMethod" -> "BUCKET"),
+      context = ExpectedContext(
+        fragment = "tablesample system (bucket 4 out of 10)",
+        start = 16,
+        stop = 54))
+    // SYSTEM + BUCKET ON colname -> error
+    checkError(
+      exception = parseException(s"$sql tablesample system (bucket 4 out of 10 on x)"),
+      condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_SAMPLE_METHOD",
+      sqlState = "0A000",
+      parameters = Map("sampleMethod" -> "BUCKET"),
+      context = ExpectedContext(
+        fragment = "tablesample system (bucket 4 out of 10 on x)",
+        start = 16,
+        stop = 59))
+    // SYSTEM + BUCKET ON function -> error
+    checkError(
+      exception = parseException(s"$sql tablesample system (bucket 3 out of 32 on rand())"),
+      condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_SAMPLE_METHOD",
+      sqlState = "0A000",
+      parameters = Map("sampleMethod" -> "BUCKET"),
+      context = ExpectedContext(
+        fragment = "tablesample system (bucket 3 out of 32 on rand())",
+        start = 16,
+        stop = 64))
+  }
+
+  test("SPARK-55978: TABLESAMPLE BERNOULLI - REPEATABLE is supported") {
+    assertEqual(
+      "select * from t tablesample bernoulli (43 percent) repeatable (123) as x",
+      Sample(0, .43d, withReplacement = false, 123L,
+        table("t").as("x"), SampleMethod.Bernoulli).select(star()))
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM - REPEATABLE not supported") {
+    val sql = "select * from t"
+    checkError(
+      exception = parseException(s"$sql tablesample system (43 percent) repeatable (123)"),
+      condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_REPEATABLE",
+      sqlState = "0A000",
+      context = ExpectedContext(
+        fragment = "tablesample system (43 percent) repeatable (123)",
+        start = 16,
+        stop = 63))
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM - fraction out of range") {
+    val sql = "select * from t"
+    // > 100 PERCENT
+    checkError(
+      exception = parseException(s"$sql tablesample system (150 percent) as x"),
+      condition = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> "Sampling fraction (1.5) must be on interval [0, 1]"),
+      context = ExpectedContext(
+        fragment = "tablesample system (150 percent)",
+        start = 16,
+        stop = 47))
+    // Negative PERCENT
+    checkError(
+      exception = parseException(s"$sql tablesample system (-10 percent) as x"),
+      condition = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> "Sampling fraction (-0.1) must be on interval [0, 1]"),
+      context = ExpectedContext(
+        fragment = "tablesample system (-10 percent)",
+        start = 16,
+        stop = 47))
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM and BERNOULLI as identifiers") {
+    // SYSTEM usable as column name (nonReserved)
+    assertEqual("SELECT system FROM t",
+      table("t").select($"system"))
+    // BERNOULLI usable as column name
+    assertEqual("SELECT bernoulli FROM t",
+      table("t").select($"bernoulli"))
+    // Usable as table alias
+    assertEqual("SELECT * FROM t system",
+      table("t").as("system").select(star()))
+    assertEqual("SELECT * FROM t bernoulli",
+      table("t").as("bernoulli").select(star()))
+    // SYSTEM as table name with default (Bernoulli) TABLESAMPLE
+    assertEqual("SELECT * FROM system TABLESAMPLE(10 PERCENT) AS x",
+      Sample(0, .1d, withReplacement = false, None,
+        table("system").as("x")).select(star()))
+    // SYSTEM as table name with TABLESAMPLE SYSTEM qualifier
+    assertEqual("SELECT * FROM system TABLESAMPLE SYSTEM (10 PERCENT) AS x",
+      Sample(0, .1d, withReplacement = false, None,
+        table("system").as("x"), SampleMethod.System).select(star()))
+    // SYSTEM as both table name and alias with TABLESAMPLE
+    assertEqual("SELECT * FROM system TABLESAMPLE(10 PERCENT) system",
+      Sample(0, .1d, withReplacement = false, None,
+        table("system").as("system")).select(star()))
+    // BERNOULLI as table name with TABLESAMPLE BERNOULLI qualifier
+    assertEqual("SELECT * FROM bernoulli TABLESAMPLE BERNOULLI (10 PERCENT) AS x",
+      Sample(0, .1d, withReplacement = false, None,
+        table("bernoulli").as("x"), SampleMethod.Bernoulli).select(star()))
+    // SYSTEM as table name with TABLESAMPLE BERNOULLI (cross-keyword)
+    assertEqual("SELECT * FROM system TABLESAMPLE BERNOULLI (10 PERCENT) AS x",
+      Sample(0, .1d, withReplacement = false, None,
+        table("system").as("x"), SampleMethod.Bernoulli).select(star()))
+    // BERNOULLI as both table name and alias with TABLESAMPLE
+    assertEqual("SELECT * FROM bernoulli TABLESAMPLE(10 PERCENT) bernoulli",
+      Sample(0, .1d, withReplacement = false, None,
+        table("bernoulli").as("bernoulli")).select(star()))
+    // Schema-qualified SYSTEM table name with TABLESAMPLE SYSTEM
+    assertEqual("SELECT * FROM mydb.system TABLESAMPLE SYSTEM (10 PERCENT) AS x",
+      Sample(0, .1d, withReplacement = false, None,
+        table("mydb", "system").as("x"), SampleMethod.System).select(star()))
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM - subquery and join contexts") {
+    // SYSTEM sample in subquery
+    assertEqual(
+      "SELECT * FROM (SELECT * FROM t TABLESAMPLE SYSTEM (50 PERCENT)) sub",
+      Sample(0, .5d, withReplacement = false, None,
+        table("t"), SampleMethod.System)
+        .select(star()).as("sub").select(star()))
+  }
+
   test("sub-query") {
     val plan = table("t0").select($"id")
     assertEqual("select id from (t0)", plan)
@@ -1854,14 +2194,14 @@ class PlanParserSuite extends AnalysisTest {
         endInclusive: Boolean = true): RelationChanges = {
       RelationChanges(
         UnresolvedRelation(Seq("a", "b", "c")),
-        new ChangelogInfo(
+        new ChangelogContext(
           new ChangelogRange.VersionRange(
             startVersion,
             endVersion.map(java.util.Optional.of[String])
               .getOrElse(java.util.Optional.empty[String]),
             startInclusive,
             endInclusive),
-          ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS,
+          ChangelogContext.DeduplicationMode.DROP_CARRYOVERS,
           false))
     }
 
@@ -1922,7 +2262,7 @@ class PlanParserSuite extends AnalysisTest {
         case rc: RelationChanges => rc
         case sa: SubqueryAlias => sa.child.asInstanceOf[RelationChanges]
       }
-      changes.changelogInfo.range().asInstanceOf[ChangelogRange.TimestampRange]
+      changes.changelogContext.range().asInstanceOf[ChangelogRange.TimestampRange]
     }
 
     // Basic timestamp range
@@ -1960,54 +2300,54 @@ class PlanParserSuite extends AnalysisTest {
   }
 
   test("CHANGES clause - with options") {
-    def assertChangelogInfo(sql: String): ChangelogInfo = {
+    def assertChangelogContext(sql: String): ChangelogContext = {
       val plan = parsePlan(sql)
       val project = plan.asInstanceOf[Project]
       val changes = project.child match {
         case rc: RelationChanges => rc
         case sa: SubqueryAlias => sa.child.asInstanceOf[RelationChanges]
       }
-      changes.changelogInfo
+      changes.changelogContext
     }
 
     // Default: DROP_CARRYOVERS and computeUpdates = false
-    val info1 = assertChangelogInfo(
+    val info1 = assertChangelogContext(
       "SELECT * FROM a.b.c CHANGES FROM VERSION 10 TO VERSION 20")
-    assert(info1.deduplicationMode() == ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
+    assert(info1.deduplicationMode() == ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
     assert(!info1.computeUpdates())
 
     // deduplicationMode = none
-    val info2 = assertChangelogInfo(
+    val info2 = assertChangelogContext(
       "SELECT * FROM a.b.c CHANGES FROM VERSION 10 TO VERSION 20 " +
         "WITH (deduplicationMode = 'none')")
-    assert(info2.deduplicationMode() == ChangelogInfo.DeduplicationMode.NONE)
+    assert(info2.deduplicationMode() == ChangelogContext.DeduplicationMode.NONE)
     assert(!info2.computeUpdates())
 
     // deduplicationMode = netChanges
-    val info3 = assertChangelogInfo(
+    val info3 = assertChangelogContext(
       "SELECT * FROM a.b.c CHANGES FROM VERSION 10 TO VERSION 20 " +
         "WITH (deduplicationMode = 'netChanges')")
-    assert(info3.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES)
+    assert(info3.deduplicationMode() == ChangelogContext.DeduplicationMode.NET_CHANGES)
 
     // computeUpdates = true
-    val info4 = assertChangelogInfo(
+    val info4 = assertChangelogContext(
       "SELECT * FROM a.b.c CHANGES FROM VERSION 10 TO VERSION 20 " +
         "WITH (computeUpdates = 'true')")
-    assert(info4.deduplicationMode() == ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
+    assert(info4.deduplicationMode() == ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
     assert(info4.computeUpdates())
 
     // Both options together
-    val info5 = assertChangelogInfo(
+    val info5 = assertChangelogContext(
       "SELECT * FROM a.b.c CHANGES FROM VERSION 10 TO VERSION 20 " +
         "WITH (deduplicationMode = 'none', computeUpdates = 'true')")
-    assert(info5.deduplicationMode() == ChangelogInfo.DeduplicationMode.NONE)
+    assert(info5.deduplicationMode() == ChangelogContext.DeduplicationMode.NONE)
     assert(info5.computeUpdates())
 
     // Case-insensitive deduplicationMode value
-    val info6 = assertChangelogInfo(
+    val info6 = assertChangelogContext(
       "SELECT * FROM a.b.c CHANGES FROM VERSION 10 TO VERSION 20 " +
         "WITH (deduplicationMode = 'DROPCARRYOVERS')")
-    assert(info6.deduplicationMode() == ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
+    assert(info6.deduplicationMode() == ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
   }
 
   test("CHANGES clause - invalid deduplicationMode") {
@@ -2038,10 +2378,10 @@ class PlanParserSuite extends AnalysisTest {
       Project(Seq(UnresolvedStar(None)),
         RelationChanges(
           UnresolvedRelation(Seq("my_table")),
-          new ChangelogInfo(
+          new ChangelogContext(
             new ChangelogRange.VersionRange(
               "1", java.util.Optional.empty[String], true, true),
-            ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS,
+            ChangelogContext.DeduplicationMode.DROP_CARRYOVERS,
             false))))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/StreamRelationParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/StreamRelationParserSuite.scala
index 880431b189a7d..61e193bf54c7b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/StreamRelationParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/StreamRelationParserSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.AliasIdentifier
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, NamedStreamingRelation, RelationChanges, UnresolvedRelation, UnresolvedStar, UnresolvedTableValuedFunction}
 import org.apache.spark.sql.catalyst.plans.logical.{Project, SubqueryAlias}
 import org.apache.spark.sql.catalyst.streaming.{Unassigned, UserProvided}
-import org.apache.spark.sql.connector.catalog.{ChangelogInfo, ChangelogRange}
+import org.apache.spark.sql.connector.catalog.{ChangelogContext, ChangelogRange}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class StreamRelationParserSuite extends AnalysisTest {
@@ -594,17 +594,18 @@ class StreamRelationParserSuite extends AnalysisTest {
     val plan = parsePlan("SELECT * FROM STREAM t CHANGES")
     val relationChanges = plan.collect { case rc: RelationChanges => rc }
     assert(relationChanges.size == 1)
-    assert(relationChanges.head.changelogInfo.range().isInstanceOf[ChangelogRange.UnboundedRange])
-    assert(relationChanges.head.changelogInfo.deduplicationMode() ==
-      ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
-    assert(!relationChanges.head.changelogInfo.computeUpdates())
+    assert(relationChanges.head.changelogContext.range()
+      .isInstanceOf[ChangelogRange.UnboundedRange])
+    assert(relationChanges.head.changelogContext.deduplicationMode() ==
+      ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
+    assert(!relationChanges.head.changelogContext.computeUpdates())
   }
 
   test("STREAM t CHANGES FROM VERSION") {
     val plan = parsePlan("SELECT * FROM STREAM t CHANGES FROM VERSION 1")
     val relationChanges = plan.collect { case rc: RelationChanges => rc }
     assert(relationChanges.size == 1)
-    val range = relationChanges.head.changelogInfo.range()
+    val range = relationChanges.head.changelogContext.range()
       .asInstanceOf[ChangelogRange.VersionRange]
     assert(range.startingVersion() == "1")
     assert(!range.endingVersion().isPresent)
@@ -615,7 +616,7 @@ class StreamRelationParserSuite extends AnalysisTest {
     val plan = parsePlan("SELECT * FROM STREAM t CHANGES FROM VERSION 5 EXCLUSIVE")
     val relationChanges = plan.collect { case rc: RelationChanges => rc }
     assert(relationChanges.size == 1)
-    val range = relationChanges.head.changelogInfo.range()
+    val range = relationChanges.head.changelogContext.range()
       .asInstanceOf[ChangelogRange.VersionRange]
     assert(range.startingVersion() == "5")
     assert(!range.startingBoundInclusive())
@@ -625,7 +626,7 @@ class StreamRelationParserSuite extends AnalysisTest {
     val plan = parsePlan("SELECT * FROM STREAM t CHANGES FROM TIMESTAMP '2026-01-01'")
     val relationChanges = plan.collect { case rc: RelationChanges => rc }
     assert(relationChanges.size == 1)
-    assert(relationChanges.head.changelogInfo.range()
+    assert(relationChanges.head.changelogContext.range()
       .isInstanceOf[ChangelogRange.TimestampRange])
   }
 
@@ -647,9 +648,9 @@ class StreamRelationParserSuite extends AnalysisTest {
         "WITH (deduplicationMode = 'none', computeUpdates = 'true')")
     val relationChanges = plan.collect { case rc: RelationChanges => rc }
     assert(relationChanges.size == 1)
-    assert(relationChanges.head.changelogInfo.deduplicationMode() ==
-      ChangelogInfo.DeduplicationMode.NONE)
-    assert(relationChanges.head.changelogInfo.computeUpdates())
+    assert(relationChanges.head.changelogContext.deduplicationMode() ==
+      ChangelogContext.DeduplicationMode.NONE)
+    assert(relationChanges.head.changelogContext.computeUpdates())
   }
 
   test("STREAM t CHANGES - error: subquery in timestamp") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/transactions/TransactionUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/transactions/TransactionUtilsSuite.scala
new file mode 100644
index 0000000000000..c6932f416e5e6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/transactions/TransactionUtilsSuite.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.transactions
+
+import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, TransactionalCatalogPlugin}
+import org.apache.spark.sql.connector.catalog.transactions.{Transaction, TransactionInfo}
+import org.apache.spark.sql.connector.read.Scan
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class TransactionUtilsSuite extends SparkFunSuite {
+  val testCatalogName = "test_catalog"
+
+  // --- Helpers ---------------------------------------------------------------
+  private def mockCatalog(catalogName: String): CatalogPlugin = new CatalogPlugin {
+    override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = ()
+    override def name(): String = catalogName
+  }
+
+  private val emptyFunction = () => ()
+  private class TestTransaction(
+      catalogName: String,
+      onCommit: () => Unit = emptyFunction,
+      onAbort: () => Unit = emptyFunction,
+      onClose: () => Unit = emptyFunction) extends Transaction {
+    var committed = false
+    var aborted = false
+    var closed = false
+
+    override def catalog(): CatalogPlugin = mockCatalog(catalogName)
+    override def commit(): Unit = { committed = true; onCommit() }
+    override def abort(): Unit = { aborted = true; onAbort() }
+    override def close(): Unit = { closed = true; onClose() }
+    override def registerScans(scans: Array[Scan]): Boolean = false
+  }
+
+  private def mockTransactionalCatalog(
+      catalogName: String,
+      txnCatalogName: String = null): TransactionalCatalogPlugin = {
+    val resolvedTxnCatalogName = Option(txnCatalogName).getOrElse(catalogName)
+    new TransactionalCatalogPlugin {
+      override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = ()
+      override def name(): String = catalogName
+      override def beginTransaction(info: TransactionInfo): Transaction =
+        new TestTransaction(resolvedTxnCatalogName)
+    }
+  }
+
+  // --- Commit ----------------------------------------------------------------
+  test("commit: calls commit then close") {
+    val txn = new TestTransaction(testCatalogName)
+    TransactionUtils.commit(txn)
+    assert(txn.committed)
+    assert(txn.closed)
+  }
+
+  test("commit: close is called even if commit fails") {
+    val txn = new TestTransaction(
+      testCatalogName, onCommit = () => throw new RuntimeException("commit failed"))
+    intercept[RuntimeException] { TransactionUtils.commit(txn) }
+    assert(txn.closed)
+  }
+
+  // --- Abort -----------------------------------------------------------------
+  test("abort: calls abort then close") {
+    val txn = new TestTransaction(testCatalogName)
+    TransactionUtils.abort(txn)
+    assert(txn.aborted)
+    assert(txn.closed)
+  }
+
+  test("abort: close is called even if abort fails") {
+    val txn = new TestTransaction(testCatalogName,
+      onAbort = () => throw new RuntimeException("abort failed"))
+    intercept[RuntimeException] { TransactionUtils.abort(txn) }
+    assert(txn.closed)
+  }
+
+  // --- Begin Transaction -----------------------------------------------------
+  test("beginTransaction: returns transaction when catalog names match") {
+    val catalog = mockTransactionalCatalog(testCatalogName)
+    val txn = TransactionUtils.beginTransaction(catalog)
+    assert(txn.catalog().name() == testCatalogName)
+  }
+
+  test("beginTransaction: fails when transaction catalog name does not match") {
+    val catalog = mockTransactionalCatalog(catalogName = testCatalogName, txnCatalogName = "other")
+    val e = intercept[SparkException] {
+      TransactionUtils.beginTransaction(catalog)
+    }
+    assert(e.getMessage.contains("other"))
+    assert(e.getMessage.contains(testCatalogName))
+  }
+
+  test("beginTransaction: aborts and closes transaction on catalog name mismatch") {
+    var aborted = false
+    var closed = false
+    val catalog = new TransactionalCatalogPlugin {
+      override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = ()
+      override def name(): String = testCatalogName
+      override def beginTransaction(info: TransactionInfo): Transaction =
+        new TestTransaction(
+          "other",
+          onAbort = () => { aborted = true },
+          onClose = () => { closed = true })
+    }
+    intercept[SparkException] { TransactionUtils.beginTransaction(catalog) }
+    assert(aborted)
+    assert(closed)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 605a6ffafe8c4..4aa03d9f8daa0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.{DateTimeException, Instant, LocalDate, LocalDateTime, LocalTime, ZoneId}
+import java.time.{DateTimeException, Instant, LocalDate, LocalDateTime, LocalTime, ZoneId, ZoneOffset}
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
@@ -695,6 +695,8 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
       withDefaultTimeZone(zid) {
         val inputTS = DateTimeUtils.stringToTimestamp(
           UTF8String.fromString("1769-10-17T17:10:02.123456"), defaultZoneId)
+        testTrunc(DateTimeUtils.TRUNC_TO_DAY, "1769-10-17T00:00:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_HOUR, "1769-10-17T17:00:00", inputTS.get, zid)
         testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "1769-10-17T17:10:00", inputTS.get, zid)
         testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "1769-10-17T17:10:02", inputTS.get, zid)
         testTrunc(DateTimeUtils.TRUNC_TO_MILLISECOND, "1769-10-17T17:10:02.123", inputTS.get, zid)
@@ -766,6 +768,97 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     }
   }
 
+  test("truncTimestamp with sub-hour zone offsets") {
+    // Asia/Kolkata (+05:30) and Asia/Kathmandu (+05:45) are not aligned to HOUR in UTC.
+    // The fast path applies the offset as part of its arithmetic, so HOUR/DAY truncation
+    // produces the correct local-aligned result without needing the slow path.
+    val kolkata = getZoneId("Asia/Kolkata")
+    val ts = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-01-15T09:42:17.123456+05:30"), kolkata).get
+    val expectedHour = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-01-15T09:00:00+05:30"), kolkata).get
+    assert(DateTimeUtils.truncTimestamp(ts, DateTimeUtils.TRUNC_TO_HOUR, kolkata) === expectedHour)
+    val expectedDay = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-01-15T00:00:00+05:30"), kolkata).get
+    assert(DateTimeUtils.truncTimestamp(ts, DateTimeUtils.TRUNC_TO_DAY, kolkata) === expectedDay)
+
+    val kathmandu = getZoneId("Asia/Kathmandu")
+    val ts2 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-01-15T09:42:17.123456+05:45"), kathmandu).get
+    val expectedHour2 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-01-15T09:00:00+05:45"), kathmandu).get
+    assert(DateTimeUtils.truncTimestamp(
+      ts2, DateTimeUtils.TRUNC_TO_HOUR, kathmandu) === expectedHour2)
+  }
+
+  test("truncTimestamp across DST transitions") {
+    val la = getZoneId("America/Los_Angeles")
+    // Spring-forward in LA: local 02:00-02:59 doesn't exist on 2024-03-10
+    // (01:59 PST jumps to 03:00 PDT). Pick 03:30 PDT just after the transition
+    // so the HOUR/DAY truncation candidate falls into the pre-transition window.
+    val postSpring = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-03-10T03:30:00-07:00"), la).get
+    val expectedHour = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-03-10T03:00:00-07:00"), la).get
+    assert(DateTimeUtils.truncTimestamp(postSpring, DateTimeUtils.TRUNC_TO_HOUR, la)
+      === expectedHour)
+    val expectedDay = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-03-10T00:00:00-08:00"), la).get
+    assert(DateTimeUtils.truncTimestamp(postSpring, DateTimeUtils.TRUNC_TO_DAY, la)
+      === expectedDay)
+
+    // Fall-back in LA: 2024-11-03 01:30 occurs twice. Truncation to HOUR/DAY should
+    // produce the same wall-clock boundary as the slow path regardless.
+    val postFall = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-11-03T01:30:00-08:00"), la).get
+    val expectedHour2 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-11-03T01:00:00-08:00"), la).get
+    assert(DateTimeUtils.truncTimestamp(postFall, DateTimeUtils.TRUNC_TO_HOUR, la)
+      === expectedHour2)
+    val expectedDay2 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2024-11-03T00:00:00-07:00"), la).get
+    assert(DateTimeUtils.truncTimestamp(postFall, DateTimeUtils.TRUNC_TO_DAY, la)
+      === expectedDay2)
+  }
+
+  test("SPARK-30766/30857: truncTimestamp before the epoch in HOUR/DAY") {
+    val la = getZoneId("America/Los_Angeles")
+    val ts1 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("1960-02-11T00:01:02.123"), la).get
+    val expectedHour1 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("1960-02-11T00:00:00"), la).get
+    assert(DateTimeUtils.truncTimestamp(ts1, DateTimeUtils.TRUNC_TO_HOUR, la) === expectedHour1)
+    val expectedDay1 = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("1960-02-11T00:00:00"), la).get
+    assert(DateTimeUtils.truncTimestamp(ts1, DateTimeUtils.TRUNC_TO_DAY, la) === expectedDay1)
+  }
+
+  test("truncTimestamp at America/Sao_Paulo midnight DST gap") {
+    // 2018-11-04 was the last Brazilian DST start; the offset jumped from -3
+    // to -2 at exactly midnight local. The local times 00:00-00:59 on this
+    // date did not exist, so DAY truncation must resolve the gap forward to
+    // 01:00 BRST. Exercises the fast path's DST-fallback for midnight gaps.
+    val zone = getZoneId("America/Sao_Paulo")
+    val ts = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2018-11-04T12:00:00-02:00"), zone).get
+    val expected = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2018-11-04T01:00:00-02:00"), zone).get
+    assert(DateTimeUtils.truncTimestamp(ts, DateTimeUtils.TRUNC_TO_DAY, zone) === expected)
+  }
+
+  test("truncTimestamp at Pacific/Apia after the 2011 calendar shift") {
+    // Pacific/Apia jumped from UTC-11 to UTC+13 on 2011-12-30, skipping the
+    // local date Dec 30 entirely. Verify that DAY truncation at a clean,
+    // post-transition, no-DST instant (June 2012, austral winter, +13)
+    // resolves to local midnight via the fast path.
+    val zone = getZoneId("Pacific/Apia")
+    val ts = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2012-06-15T13:00:00+13:00"), zone).get
+    val expected = DateTimeUtils.stringToTimestamp(
+      UTF8String.fromString("2012-06-15T00:00:00+13:00"), zone).get
+    assert(DateTimeUtils.truncTimestamp(ts, DateTimeUtils.TRUNC_TO_DAY, zone) === expected)
+  }
+
   test("SPARK-51554: time truncation using timeTrunc") {
     // 01:02:03.400500600
     val input = localTimeToNanos(LocalTime.of(1, 2, 3, 400500600))
@@ -1525,4 +1618,211 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
         assert(result === expected)
       }
     }
+
+  test("timeBucketDTInterval") {
+    val utc = ZoneOffset.UTC
+    // 15-minute bucket with default (epoch) origin
+    assert(timeBucketDTInterval(15 * MICROS_PER_MINUTE,
+      date(2024, 1, 1, 11, 27, 0), 0L, utc) === date(2024, 1, 1, 11, 15, 0))
+    // 1-hour bucket
+    assert(timeBucketDTInterval(MICROS_PER_HOUR,
+      date(2024, 1, 1, 11, 27, 0), 0L, utc) === date(2024, 1, 1, 11, 0, 0))
+    // Custom origin shifts alignment: grid anchored at :05
+    assert(timeBucketDTInterval(MICROS_PER_HOUR,
+      date(2024, 1, 1, 11, 27, 0), date(1970, 1, 1, 0, 5, 0), utc)
+      === date(2024, 1, 1, 11, 5, 0))
+    // 7-day weekly bucket (epoch = Thursday, so buckets run Thu-Wed)
+    assert(timeBucketDTInterval(7 * MICROS_PER_DAY,
+      date(2024, 1, 10, 11, 27, 0), 0L, utc) === date(2024, 1, 4, 0, 0, 0))
+    // ts exactly on boundary returns same instant
+    assert(timeBucketDTInterval(15 * MICROS_PER_MINUTE,
+      date(2024, 1, 1, 11, 15, 0), 0L, utc) === date(2024, 1, 1, 11, 15, 0))
+    // Origin AFTER ts: floorDiv must handle negative diff correctly
+    assert(timeBucketDTInterval(MICROS_PER_HOUR,
+      date(2024, 1, 1, 11, 27, 0), date(2025, 1, 1, 0, 30, 0), utc)
+      === date(2024, 1, 1, 10, 30, 0))
+    // Pre-epoch ts
+    assert(timeBucketDTInterval(MICROS_PER_DAY,
+      date(1969, 12, 31, 23, 30, 0), 0L, utc) === date(1969, 12, 31, 0, 0, 0))
+    // 1-microsecond bucket preserves exact value
+    assert(timeBucketDTInterval(1L,
+      date(2024, 6, 20, 10, 0, 0, 123456), 0L, utc)
+      === date(2024, 6, 20, 10, 0, 0, 123456))
+    // Session-zone bucketing for whole-day buckets in LA: daily buckets land on local
+    // midnight, including across the spring-forward (2024-03-10) and fall-back
+    // (2024-11-03) DST transitions. Origin = local 1970-01-01 00:00 PST = 8h UTC.
+    val la = DateTimeUtils.getZoneId("America/Los_Angeles")
+    val laOrigin = DateTimeUtils.daysToMicros(0, la)
+    // Winter ts: 2024-02-15 18:00 UTC = 2024-02-15 10:00 PST; bucket = 2024-02-15 PST.
+    assert(timeBucketDTInterval(MICROS_PER_DAY,
+      date(2024, 2, 15, 18, 0, 0), laOrigin, la)
+      === DateTimeUtils.daysToMicros(LocalDate.of(2024, 2, 15).toEpochDay.toInt, la))
+    // Summer ts: 2024-07-15 17:00 UTC = 2024-07-15 10:00 PDT; bucket = 2024-07-15 PDT.
+    assert(timeBucketDTInterval(MICROS_PER_DAY,
+      date(2024, 7, 15, 17, 0, 0), laOrigin, la)
+      === DateTimeUtils.daysToMicros(LocalDate.of(2024, 7, 15).toEpochDay.toInt, la))
+    // Spring-forward day (23 UTC hours): ts 2024-03-10 19:00 UTC = 2024-03-10 12:00 PDT.
+    // k_lin lands on the right bucket -- no adjustment needed (DST drift hasn't
+    // accumulated at the bucket-start instant of 2024-03-10 00:00 PST).
+    assert(timeBucketDTInterval(MICROS_PER_DAY,
+      date(2024, 3, 10, 19, 0, 0), laOrigin, la)
+      === DateTimeUtils.daysToMicros(LocalDate.of(2024, 3, 10).toEpochDay.toInt, la))
+    // Fall-back day (Nov 3 in LA spans 25 UTC hours): ts at 2024-11-04 02:00 UTC.
+    // Bucket = local midnight Nov 3 in LA = 2024-11-03 07:00 UTC (PDT, since fall-back
+    // happens at 02:00 and 00:00 is still on the pre-transition offset).
+    assert(timeBucketDTInterval(MICROS_PER_DAY,
+      date(2024, 11, 4, 2, 0, 0), laOrigin, la)
+      === DateTimeUtils.daysToMicros(LocalDate.of(2024, 11, 3).toEpochDay.toInt, la))
+    // Custom origin in LA: 1-day bucket with origin at 06:00 PST. After 60 calendar days,
+    // bucket(60) = 2024-03-15 06:00 PDT, preserving local TOD across spring-forward.
+    val laCustomOrigin = date(2024, 1, 15, 14, 0, 0)  // 14:00 UTC = 2024-01-15 06:00 PST
+    assert(timeBucketDTInterval(MICROS_PER_DAY,
+      date(2024, 3, 15, 13, 0, 0), laCustomOrigin, la)  // 13:00 UTC = 2024-03-15 06:00 PDT
+      === date(2024, 3, 15, 13, 0, 0))
+    // Compound DT (36h = 1 day + 12h) across spring-forward exercises the step-forward
+    // arm of estimate-and-adjust. Origin = 2024-03-08 00:00 PST. The span from origin to
+    // candidate(3) crosses spring-forward (losing 1h), so candidate(3) lands 1 hour
+    // earlier in UTC than a UTC-linear k=3 boundary would: zone-aware c(3) = 2024-03-12
+    // 19:00 UTC, UTC-linear would predict 20:00 UTC. Linear estimate from ts=19:00 UTC
+    // gives k=2, c(2) < ts, c(3) <= ts -> step forward to k=3.
+    val dtSpringOrigin = date(2024, 3, 8, 8, 0, 0)  // 08:00 UTC = 2024-03-08 00:00 PST
+    assert(timeBucketDTInterval(36 * MICROS_PER_HOUR,
+      date(2024, 3, 12, 19, 0, 0), dtSpringOrigin, la)  // 19:00 UTC = 2024-03-12 12:00 PDT
+      === date(2024, 3, 12, 19, 0, 0))
+    // Compound DT (36h) across fall-back exercises the step-back arm. Origin =
+    // 2024-11-01 00:00 PDT. ts at 2024-11-05 19:30 UTC: linear estimate is k=3, but
+    // c(3) = `origin + INTERVAL '108' HOUR` = 2024-11-05 20:00 UTC > ts -> step back to
+    // k=2. c(2) = `origin + INTERVAL '72' HOUR` = 2024-11-04 00:00 PST.
+    val dtFallOrigin = date(2024, 11, 1, 7, 0, 0)  // 07:00 UTC = 2024-11-01 00:00 PDT
+    assert(timeBucketDTInterval(36 * MICROS_PER_HOUR,
+      date(2024, 11, 5, 19, 30, 0), dtFallOrigin, la)
+      === date(2024, 11, 4, 8, 0, 0))  // 08:00 UTC = 2024-11-04 00:00 PST
+    // Compound DT (36h) within a single non-DST month exercises the no-step arm: the
+    // linear estimate is exact, c(k) <= ts < c(k+1).
+    assert(timeBucketDTInterval(36 * MICROS_PER_HOUR,
+      date(2024, 6, 5, 18, 0, 0), date(2024, 6, 1, 7, 0, 0), la)
+      === date(2024, 6, 4, 7, 0, 0))  // k=2: 2024-06-04 00:00 PDT, no DST in span
+    // Origin AFTER ts crossing spring-forward exercises step-back in the negative-k
+    // arm. candidate(-9) = 2024-02-12 06:00 PST = 14:00 UTC > ts (13:30 UTC) -> step
+    // back to k=-10 = 2024-02-05 06:00 PST.
+    val laOriginNeg = date(2024, 4, 15, 13, 0, 0)  // 13:00 UTC = 2024-04-15 06:00 PDT
+    assert(timeBucketDTInterval(7 * MICROS_PER_DAY,
+      date(2024, 2, 12, 13, 30, 0), laOriginNeg, la)
+      === date(2024, 2, 5, 14, 0, 0))  // 14:00 UTC = 2024-02-05 06:00 PST
+    // Sub-day bucket is zone-independent: result with LA matches result with UTC.
+    assert(timeBucketDTInterval(15 * MICROS_PER_MINUTE,
+      date(2024, 3, 10, 19, 7, 0), 0L, la)
+      === timeBucketDTInterval(15 * MICROS_PER_MINUTE,
+        date(2024, 3, 10, 19, 7, 0), 0L, utc))
+    // Overflow in subtractExact (ts - origin underflows below Long.MinValue)
+    intercept[ArithmeticException] {
+      timeBucketDTInterval(1L, Long.MinValue, Long.MaxValue, utc)
+    }
+    // Overflow in subtractExact (ts - origin overflows above Long.MaxValue)
+    intercept[ArithmeticException] {
+      timeBucketDTInterval(1L, Long.MaxValue, -1L, utc)
+    }
+    // Overflow in multiplyExact (floorDiv * bucketMicros)
+    intercept[ArithmeticException] {
+      timeBucketDTInterval(3L, Long.MinValue, 0L, utc)
+    }
+    // Overflow in instantToMicros after extreme plusDays shift
+    intercept[ArithmeticException] {
+      timeBucketDTInterval(Long.MaxValue, -6L, -5L, utc)
+    }
+  }
+
+  test("timeBucketYMInterval") {
+    val utc = ZoneOffset.UTC
+    // 1-month bucket default origin
+    assert(timeBucketYMInterval(1,
+      date(2024, 3, 15, 11, 27, 0), 0L, utc) === date(2024, 3, 1, 0, 0, 0))
+    // 3-month (quarterly) bucket
+    assert(timeBucketYMInterval(3,
+      date(2024, 5, 15, 10, 0, 0), 0L, utc) === date(2024, 4, 1, 0, 0, 0))
+    // 12-month (yearly) bucket
+    assert(timeBucketYMInterval(12,
+      date(2024, 5, 15, 10, 0, 0), 0L, utc) === date(2024, 1, 1, 0, 0, 0))
+    // Monthly with origin on 15th: grid anchored at day-of-month = 15
+    assert(timeBucketYMInterval(1,
+      date(2024, 3, 20, 9, 0, 0), date(1970, 1, 15, 0, 0, 0), utc)
+      === date(2024, 3, 15, 0, 0, 0))
+    // End-of-month capping with step-back: origin on 1970-01-31, 1-month bucket.
+    // AddMonths(1970-01-31, k) caps to 2024-03-31 for large k (> ts); step back to
+    // 2024-02-29 (leap year).
+    assert(timeBucketYMInterval(1,
+      date(2024, 3, 1, 12, 0, 0), date(1970, 1, 31, 0, 0, 0), utc)
+      === date(2024, 2, 29, 0, 0, 0))
+    // Leap-year capping: origin on Feb 29, 1-year bucket, non-leap target.
+    assert(timeBucketYMInterval(12,
+      date(2025, 3, 1, 0, 0, 0), date(2024, 2, 29, 0, 0, 0), utc)
+      === date(2025, 2, 28, 0, 0, 0))
+    // Step-back without end-of-month capping: candidate(1) = Feb 15 12:00 lands later
+    // in the same calendar month as ts (Feb 10 11:00) -> step back to origin.
+    assert(timeBucketYMInterval(1,
+      date(2024, 2, 10, 11, 0, 0), date(2024, 1, 15, 12, 0, 0), utc)
+      === date(2024, 1, 15, 12, 0, 0))
+    // Pre-epoch ts
+    assert(timeBucketYMInterval(1,
+      date(1968, 7, 15, 10, 0, 0), 0L, utc) === date(1968, 7, 1, 0, 0, 0))
+    // Session-zone bucketing: in LA (UTC-8 winter / UTC-7 summer), monthly buckets land on
+    // local month-start regardless of DST. Origin = local 1970-01-01 00:00 PST = 8h UTC.
+    val la = DateTimeUtils.getZoneId("America/Los_Angeles")
+    val laOrigin = DateTimeUtils.daysToMicros(0, la)
+    // Winter ts: 2024-02-15 10:00 PST = 2024-02-15 18:00 UTC; bucket = 2024-02-01 PST.
+    assert(timeBucketYMInterval(1,
+      date(2024, 2, 15, 18, 0, 0), laOrigin, la)
+      === DateTimeUtils.daysToMicros(LocalDate.of(2024, 2, 1).toEpochDay.toInt, la))
+    // Summer ts: 2024-07-15 17:00 UTC = 2024-07-15 10:00 PDT; bucket = 2024-07-01 PDT.
+    assert(timeBucketYMInterval(1,
+      date(2024, 7, 15, 17, 0, 0), laOrigin, la)
+      === DateTimeUtils.daysToMicros(LocalDate.of(2024, 7, 1).toEpochDay.toInt, la))
+    // Fall-back fold origin: 2024-11-03 23:30 PST (in the 25-hour day after fall-back at
+    // 02:00). Origin's offset is PST (-08:00). Bucket should preserve the post-fold offset
+    // on the candidate day, matching `+ INTERVAL '1' MONTH` semantics.
+    // Origin: 2024-11-03 23:30 PST = 2024-11-04 07:30 UTC.
+    // ts:     2024-12-05 12:00 PST = 2024-12-05 20:00 UTC.
+    // Bucket: 2024-12-03 23:30 PST = 2024-12-04 07:30 UTC.
+    assert(timeBucketYMInterval(1,
+      date(2024, 12, 5, 20, 0, 0), date(2024, 11, 4, 7, 30, 0), la)
+      === date(2024, 12, 4, 7, 30, 0))
+    // Origin = ts = the SECOND occurrence of 01:30 on the fall-back day (PST, post-fold).
+    // bucket(origin, origin) must equal origin; an offset-naive impl would return PDT
+    // (1h earlier).
+    val foldOrigin = date(2024, 11, 3, 9, 30, 0)  // = 2024-11-03 01:30 PST
+    assert(timeBucketYMInterval(1, foldOrigin, foldOrigin, la) === foldOrigin)
+    // Origin Feb 3 2024 01:30 PST + 9 months -> Nov 3 2024 01:30 in fold. Origin's offset
+    // PST is valid in the fold so it is retained -> Nov 3 09:30 UTC. An offset-naive impl
+    // would pick the earlier offset PDT -> Nov 3 08:30 UTC (1h earlier).
+    assert(timeBucketYMInterval(9,
+      date(2024, 11, 4, 0, 0, 0),    // ts later than candidate
+      date(2024, 2, 3, 9, 30, 0),    // = 2024-02-03 01:30 PST
+      la) === date(2024, 11, 3, 9, 30, 0))
+    // Symmetric case: origin Aug 3 2024 01:30 PDT + 3 months -> Nov 3 2024 01:30 in fold.
+    // Origin's offset PDT is also valid in the fold (first occurrence) so it is retained
+    // -> Nov 3 08:30 UTC.
+    assert(timeBucketYMInterval(3,
+      date(2024, 11, 3, 9, 0, 0),    // ts later than candidate
+      date(2024, 8, 3, 8, 30, 0),    // = 2024-08-03 01:30 PDT
+      la) === date(2024, 11, 3, 8, 30, 0))
+    // Origin = FIRST occurrence of 01:30 on the fall-back day (PDT, pre-fold). +1 month
+    // -> local Dec 3 01:30; December has no PDT, so resolves to PST -> Dec 3 09:30 UTC.
+    // The bucket span is 30d + 1h in UTC (origin's PDT offset is dropped crossing into a
+    // month where PDT does not exist).
+    assert(timeBucketYMInterval(1,
+      date(2024, 12, 5, 20, 0, 0),   // ts: 2024-12-05 12:00 PST
+      date(2024, 11, 3, 8, 30, 0),   // origin: 2024-11-03 01:30 PDT
+      la) === date(2024, 12, 3, 9, 30, 0))
+    // Origin = SECOND occurrence of 01:30 on the fall-back day (PST, post-fold). +1 month
+    // -> Dec 3 01:30 PST = 09:30 UTC. Same candidate instant as the previous test, but
+    // origin is 1h later in UTC, so the bucket spans 30d in UTC.
+    assert(timeBucketYMInterval(1,
+      date(2024, 12, 5, 20, 0, 0),   // ts
+      date(2024, 11, 3, 9, 30, 0),   // origin: 2024-11-03 01:30 PST
+      la) === date(2024, 12, 3, 9, 30, 0))
+    // Extreme ts: instantToMicros overflow on the converted bucket boundary.
+    intercept[ArithmeticException] {
+      timeBucketYMInterval(1, Long.MinValue, 0L, utc)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
index fece3e36bc593..bca033d0673da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.catalyst.util;
 
 import org.apache.spark.SparkIllegalArgumentException;
-import org.apache.spark.unsafe.types.GeographyVal;
+import org.apache.spark.SparkRuntimeException;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.junit.jupiter.api.Test;
 
 import java.nio.ByteBuffer;
@@ -56,7 +57,7 @@ void testFromBytes() {
 
   @Test
   void testFromValue() {
-    GeographyVal value = GeographyVal.fromBytes(testGeographyVal);
+    BinaryView value = BinaryView.fromBytes(testGeographyVal);
     Geography geography = Geography.fromValue(value);
     assertNotNull(geography);
     assertEquals(value, geography.getValue());
@@ -101,7 +102,7 @@ void testFromWkbWithSridRudimentary() {
     byte[] wkb = getTestWKBPoint();
     Geography geography = Geography.fromWkb(wkb, 4326);
     assertNotNull(geography);
-    assertArrayEquals(wkb, geography.toWkb());
+    assertArrayEquals(wkb, geography.toWkb(ByteOrder.LITTLE_ENDIAN));
     assertEquals(4326, geography.srid());
   }
 
@@ -110,7 +111,7 @@ void testFromWkbNoSridRudimentary() {
     byte[] wkb = getTestWKBPoint();
     Geography geography = Geography.fromWkb(wkb);
     assertNotNull(geography);
-    assertArrayEquals(wkb, geography.toWkb());
+    assertArrayEquals(wkb, geography.toWkb(ByteOrder.LITTLE_ENDIAN));
     assertEquals(4326, geography.srid());
   }
 
@@ -178,7 +179,7 @@ void testToWkb() {
     Geography geography = Geography.fromBytes(testGeographyVal);
     // WKB value (endianness: NDR) corresponding to WKT: POINT(1 2).
     byte[] wkb = HexFormat.of().parseHex("0101000000000000000000f03f0000000000000040");
-    assertArrayEquals(wkb, geography.toWkb());
+    assertArrayEquals(wkb, geography.toWkb(ByteOrder.LITTLE_ENDIAN));
   }
 
   @Test
@@ -250,4 +251,33 @@ void testSrid() {
     Geography geography = Geography.fromBytes(testGeographyVal);
     assertEquals(4326, geography.srid());
   }
+
+  @Test
+  void testSetSridOnTightOwner() {
+    // fromBytes wraps a tight on-heap array, so setSrid writes through in place.
+    Geography geography = Geography.fromBytes(testGeographyVal.clone());
+    geography.setSrid(4269);
+    assertEquals(4269, geography.srid());
+  }
+
+  @Test
+  void testSetSridThrowsWhenNotTightOwner() {
+    // A sub-range view does not own a tight backing array, so getBytes() returns a copy and an
+    // in-place setSrid would be silently lost. It must fail loudly instead of dropping the write.
+    byte[] padded = new byte[testGeographyVal.length + 4];
+    System.arraycopy(testGeographyVal, 0, padded, 4, testGeographyVal.length);
+    Geography geography = Geography.fromValue(
+      BinaryView.fromBytes(padded, 4, testGeographyVal.length));
+    // Reads still work (they copy out), and the original SRID is intact.
+    assertEquals(4326, geography.srid());
+    SparkRuntimeException e = assertThrows(
+      SparkRuntimeException.class, () -> geography.setSrid(4269));
+    assertEquals("INTERNAL_ERROR", e.getCondition());
+    // After copy() the value owns a tight array, so setSrid succeeds and writes through.
+    Geography owned = geography.copy();
+    owned.setSrid(4269);
+    assertEquals(4269, owned.srid());
+    // The original sub-range view is untouched.
+    assertEquals(4326, geography.srid());
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
index 5d0b11e969ad3..4cd8f11dc66aa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.catalyst.util;
 
 import org.apache.spark.SparkIllegalArgumentException;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.SparkRuntimeException;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.junit.jupiter.api.Test;
 
 import java.nio.ByteBuffer;
@@ -67,7 +68,7 @@ void testFromBytes() {
 
   @Test
   void testFromValue() {
-    GeometryVal value = GeometryVal.fromBytes(testGeometryVal);
+    BinaryView value = BinaryView.fromBytes(testGeometryVal);
     Geometry geometry = Geometry.fromValue(value);
     assertNotNull(geometry);
     assertEquals(value, geometry.getValue());
@@ -113,7 +114,7 @@ void testFromWkbWithSridRudimentary() {
     byte[] wkb = getTestWKBPoint();
     Geometry geometry = Geometry.fromWkb(wkb, 4326);
     assertNotNull(geometry);
-    assertArrayEquals(wkb, geometry.toWkb());
+    assertArrayEquals(wkb, geometry.toWkb(ByteOrder.LITTLE_ENDIAN));
     assertEquals(4326, geometry.srid());
   }
 
@@ -124,7 +125,7 @@ void testFromWkbNoSridRudimentary() {
     // Once we implement the appropriate parsing logic, this test should be updated accordingly.
     Geometry geometry = Geometry.fromWkb(wkb);
     assertNotNull(geometry);
-    assertArrayEquals(wkb, geometry.toWkb());
+    assertArrayEquals(wkb, geometry.toWkb(ByteOrder.LITTLE_ENDIAN));
     assertEquals(0, geometry.srid());
   }
 
@@ -192,7 +193,7 @@ void testToWkb() {
     Geometry geometry = Geometry.fromBytes(testGeometryVal);
     // WKB value (endianness: NDR) corresponding to WKT: POINT(1 2).
     byte[] wkb = HexFormat.of().parseHex("0101000000000000000000f03f0000000000000040");
-    assertArrayEquals(wkb, geometry.toWkb());
+    assertArrayEquals(wkb, geometry.toWkb(ByteOrder.LITTLE_ENDIAN));
   }
 
   @Test
@@ -270,4 +271,33 @@ void testSrid() {
     Geometry geometry = Geometry.fromBytes(testGeometryVal);
     assertEquals(4326, geometry.srid());
   }
+
+  @Test
+  void testSetSridOnTightOwner() {
+    // fromBytes wraps a tight on-heap array, so setSrid writes through in place.
+    Geometry geometry = Geometry.fromBytes(testGeometryVal.clone());
+    geometry.setSrid(3857);
+    assertEquals(3857, geometry.srid());
+  }
+
+  @Test
+  void testSetSridThrowsWhenNotTightOwner() {
+    // A sub-range view does not own a tight backing array, so getBytes() returns a copy and an
+    // in-place setSrid would be silently lost. It must fail loudly instead of dropping the write.
+    byte[] padded = new byte[testGeometryVal.length + 4];
+    System.arraycopy(testGeometryVal, 0, padded, 4, testGeometryVal.length);
+    Geometry geometry = Geometry.fromValue(
+      BinaryView.fromBytes(padded, 4, testGeometryVal.length));
+    // Reads still work (they copy out), and the original SRID is intact.
+    assertEquals(4326, geometry.srid());
+    SparkRuntimeException e = assertThrows(
+      SparkRuntimeException.class, () -> geometry.setSrid(3857));
+    assertEquals("INTERNAL_ERROR", e.getCondition());
+    // After copy() the value owns a tight array, so setSrid succeeds and writes through.
+    Geometry owned = geometry.copy();
+    owned.setSrid(3857);
+    assertEquals(3857, owned.srid());
+    // The original sub-range view is untouched.
+    assertEquals(4326, geometry.srid());
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
index aa1b4735cf62f..ad5f5e9664b73 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.catalyst.util;
 
 import org.apache.spark.SparkIllegalArgumentException;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
+import org.apache.spark.unsafe.types.UTF8String;
 import org.junit.jupiter.api.Test;
 
 import java.nio.ByteBuffer;
@@ -38,6 +38,8 @@ class STUtilsSuite {
   private final byte[] testWkb = new byte[] {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, (byte)0xF0, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40};
 
+  private final UTF8String ENDIANNESS_NDR = UTF8String.fromString("NDR");
+
   // A sample Geography byte array for testing purposes, representing a POINT(1 2) with SRID 4326.
   private final int testGeographySrid = 4326;
   private final byte[] testGeographyBytes;
@@ -73,14 +75,14 @@ class STUtilsSuite {
 
   @Test
   void testGeometryToGeography() {
-    GeometryVal geometryVal = GeometryVal.fromBytes(testGeometry4326Bytes);
-    GeographyVal geographyVal = STUtils.geometryToGeography(geometryVal);
+    BinaryView geometryVal = BinaryView.fromBytes(testGeometry4326Bytes);
+    BinaryView geographyVal = STUtils.geometryToGeography(geometryVal);
     assertNotNull(geographyVal);
     assertArrayEquals(geometryVal.getBytes(), geographyVal.getBytes());
     // Non-geographic SRID should not be allowed for geometry to geography casting.
     SparkIllegalArgumentException sridException = assertThrows(
       SparkIllegalArgumentException.class,
-      () -> STUtils.geometryToGeography(GeometryVal.fromBytes(testGeometryBytes)));
+      () -> STUtils.geometryToGeography(BinaryView.fromBytes(testGeometryBytes)));
     assertEquals("ST_INVALID_SRID_VALUE", sridException.getCondition());
     // Coordinates outside geography bounds should not be allowed even with a valid SRID.
     ByteBuffer oobWkbBuf = ByteBuffer.allocate(21).order(ByteOrder.LITTLE_ENDIAN);
@@ -93,14 +95,14 @@ void testGeometryToGeography() {
     System.arraycopy(oobWkb, 0, oobGeomBytes, sridLen, oobWkb.length);
     SparkIllegalArgumentException coordinateException = assertThrows(
       SparkIllegalArgumentException.class,
-      () -> STUtils.geometryToGeography(GeometryVal.fromBytes(oobGeomBytes)));
+      () -> STUtils.geometryToGeography(BinaryView.fromBytes(oobGeomBytes)));
     assertEquals("WKB_PARSE_ERROR", coordinateException.getCondition());
   }
 
   @Test
   void testGeographyToGeometry() {
-    GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
-    GeometryVal geometryVal = STUtils.geographyToGeometry(geographyVal);
+    BinaryView geographyVal = BinaryView.fromBytes(testGeographyBytes);
+    BinaryView geometryVal = STUtils.geographyToGeometry(geographyVal);
     assertNotNull(geometryVal);
     assertArrayEquals(geographyVal.getBytes(), geometryVal.getBytes());
   }
@@ -110,16 +112,16 @@ void testGeographyToGeometry() {
   // ST_AsBinary
   @Test
   void testStAsBinaryGeography() {
-    GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
-    byte[] geographyWkb = STUtils.stAsBinary(geographyVal);
+    BinaryView geographyVal = BinaryView.fromBytes(testGeographyBytes);
+    byte[] geographyWkb = STUtils.stGeogAsBinary(geographyVal, ENDIANNESS_NDR);
     assertNotNull(geographyWkb);
     assertArrayEquals(testWkb, geographyWkb);
   }
 
   @Test
   void testStAsBinaryGeometry() {
-    GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
-    byte[] geometryWkb = STUtils.stAsBinary(geometryVal);
+    BinaryView geometryVal = BinaryView.fromBytes(testGeometryBytes);
+    byte[] geometryWkb = STUtils.stGeomAsBinary(geometryVal, ENDIANNESS_NDR);
     assertNotNull(geometryWkb);
     assertArrayEquals(testWkb, geometryWkb);
   }
@@ -127,35 +129,67 @@ void testStAsBinaryGeometry() {
   // ST_AsEWKT
   @Test
   void testStAsEwktGeography() {
-    GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
-    assertEquals("SRID=4326;POINT(1 2)", STUtils.stAsEwkt(geographyVal).toString());
+    BinaryView geographyVal = BinaryView.fromBytes(testGeographyBytes);
+    assertEquals("SRID=4326;POINT(1 2)", STUtils.stGeogAsEwkt(geographyVal).toString());
   }
 
   @Test
   void testStAsEwktGeometry() {
-    GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
-    assertEquals("POINT(1 2)", STUtils.stAsEwkt(geometryVal).toString());
+    BinaryView geometryVal = BinaryView.fromBytes(testGeometryBytes);
+    assertEquals("POINT(1 2)", STUtils.stGeomAsEwkt(geometryVal).toString());
   }
 
   // ST_GeogFromWKB
   @Test
-  void testStGeogFromWKB() {
-    GeographyVal geographyVal = STUtils.stGeogFromWKB(testWkb);
+  void testStGeogFromWKBNoSrid() {
+    BinaryView geographyVal = STUtils.stGeogFromWKB(testWkb);
     assertNotNull(geographyVal);
     assertArrayEquals(testGeographyBytes, geographyVal.getBytes());
   }
 
+  @Test
+  void testStGeogFromWKBWithDefaultSrid() {
+    BinaryView geographyVal = STUtils.stGeogFromWKB(testWkb, testGeographySrid);
+    assertNotNull(geographyVal);
+    assertArrayEquals(testGeographyBytes, geographyVal.getBytes());
+  }
+
+  @Test
+  void testStGeogFromWKBWithValidSrid() {
+    // Geography supports a variety of geographic SRIDs (not just the default 4326).
+    for (int validGeographySrid : new int[]{4267, 4269, 4326, 4612, 37001, 104030}) {
+      BinaryView geographyVal = STUtils.stGeogFromWKB(testWkb, validGeographySrid);
+      assertNotNull(geographyVal);
+      byte[] expectedBytes = new byte[testWkb.length + sridLen];
+      byte[] geogSrid = ByteBuffer.allocate(sridLen).order(end).putInt(validGeographySrid).array();
+      System.arraycopy(geogSrid, 0, expectedBytes, 0, sridLen);
+      System.arraycopy(testWkb, 0, expectedBytes, sridLen, testWkb.length);
+      assertArrayEquals(expectedBytes, geographyVal.getBytes());
+    }
+  }
+
+  @Test
+  void testStGeogFromWKBWithInvalidSrid() {
+    // SRIDs that are either out of range or correspond to non-geographic SRSes (e.g. 0, 3857).
+    for (int invalidGeographySrid : new int[]{-9999, -2, -1, 0, 1, 2, 3857, 9999}) {
+      SparkIllegalArgumentException exception = assertThrows(SparkIllegalArgumentException.class,
+              () -> STUtils.stGeogFromWKB(testWkb, invalidGeographySrid));
+      assertEquals("ST_INVALID_SRID_VALUE", exception.getCondition());
+      assertTrue(exception.getMessage().contains("value: " + invalidGeographySrid + "."));
+    }
+  }
+
   // ST_GeomFromWKB
   @Test
   void testStGeomFromWKBNoSrid() {
-    GeometryVal geometryVal = STUtils.stGeomFromWKB(testWkb);
+    BinaryView geometryVal = STUtils.stGeomFromWKB(testWkb);
     assertNotNull(geometryVal);
     assertArrayEquals(testGeometryBytes, geometryVal.getBytes());
   }
 
   @Test
   void testStGeomFromWKBWithDefaultSrid() {
-    GeometryVal geometryVal = STUtils.stGeomFromWKB(testWkb, testGeometrySrid);
+    BinaryView geometryVal = STUtils.stGeomFromWKB(testWkb, testGeometrySrid);
     assertNotNull(geometryVal);
     assertArrayEquals(testGeometryBytes, geometryVal.getBytes());
   }
@@ -163,7 +197,7 @@ void testStGeomFromWKBWithDefaultSrid() {
   @Test
   void testStGeomFromWKBWithValidSrid() {
     int srid = 4326;
-    GeometryVal geometryVal = STUtils.stGeomFromWKB(testWkb, srid);
+    BinaryView geometryVal = STUtils.stGeomFromWKB(testWkb, srid);
     assertNotNull(geometryVal);
     byte[] testGeometryBytes = new byte[testWkb.length + sridLen];
     byte[] geomSrid = ByteBuffer.allocate(sridLen).order(end).putInt(srid).array();
@@ -185,22 +219,22 @@ void testStGeomFromWKBWithInvalidSrid() {
   // ST_Srid
   @Test
   void testStSridGeography() {
-    GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
-    assertEquals(testGeographySrid, STUtils.stSrid(geographyVal));
+    BinaryView geographyVal = BinaryView.fromBytes(testGeographyBytes);
+    assertEquals(testGeographySrid, STUtils.stGeogSrid(geographyVal));
   }
 
   @Test
   void testStSridGeometry() {
-    GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
-    assertEquals(testGeometrySrid, STUtils.stSrid(geometryVal));
+    BinaryView geometryVal = BinaryView.fromBytes(testGeometryBytes);
+    assertEquals(testGeometrySrid, STUtils.stGeomSrid(geometryVal));
   }
 
   // ST_SetSrid
   @Test
   void testStSetSridGeography() {
     for (int validGeographySrid : new int[]{4326}) {
-      GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
-      GeographyVal updatedGeographyVal = STUtils.stSetSrid(geographyVal, validGeographySrid);
+      BinaryView geographyVal = BinaryView.fromBytes(testGeographyBytes);
+      BinaryView updatedGeographyVal = STUtils.stGeogSetSrid(geographyVal, validGeographySrid);
       assertNotNull(updatedGeographyVal);
       Geography updatedGeography = Geography.fromBytes(updatedGeographyVal.getBytes());
       assertEquals(validGeographySrid, updatedGeography.srid());
@@ -210,9 +244,9 @@ void testStSetSridGeography() {
   @Test
   void testStSetSridGeographyInvalidSrid() {
     for (int invalidGeographySrid : new int[]{-9999, -2, -1, 0, 1, 2, 3857, 9999}) {
-      GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
+      BinaryView geographyVal = BinaryView.fromBytes(testGeographyBytes);
       SparkIllegalArgumentException exception = assertThrows(SparkIllegalArgumentException.class,
-        () -> STUtils.stSetSrid(geographyVal, invalidGeographySrid));
+        () -> STUtils.stGeogSetSrid(geographyVal, invalidGeographySrid));
       assertEquals("ST_INVALID_SRID_VALUE", exception.getCondition());
       assertTrue(exception.getMessage().contains("value: " + invalidGeographySrid + "."));
     }
@@ -221,8 +255,8 @@ void testStSetSridGeographyInvalidSrid() {
   @Test
   void testStSetSridGeometry() {
     for (int validGeographySrid : new int[]{0, 3857, 4326}) {
-      GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
-      GeometryVal updatedGeometryVal = STUtils.stSetSrid(geometryVal, validGeographySrid);
+      BinaryView geometryVal = BinaryView.fromBytes(testGeometryBytes);
+      BinaryView updatedGeometryVal = STUtils.stGeomSetSrid(geometryVal, validGeographySrid);
       assertNotNull(updatedGeometryVal);
       Geometry updatedGeometry = Geometry.fromBytes(updatedGeometryVal.getBytes());
       assertEquals(validGeographySrid, updatedGeometry.srid());
@@ -232,9 +266,9 @@ void testStSetSridGeometry() {
   @Test
   void testStSetSridGeometryInvalidSrid() {
     for (int invalidGeometrySrid : new int[]{-9999, -2, -1, 1, 2, 9999}) {
-      GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
+      BinaryView geometryVal = BinaryView.fromBytes(testGeometryBytes);
       SparkIllegalArgumentException exception = assertThrows(SparkIllegalArgumentException.class,
-        () -> STUtils.stSetSrid(geometryVal, invalidGeometrySrid));
+        () -> STUtils.stGeomSetSrid(geometryVal, invalidGeometrySrid));
       assertEquals("ST_INVALID_SRID_VALUE", exception.getCondition());
       assertTrue(exception.getMessage().contains("value: " + invalidGeometrySrid + "."));
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
index 77ae1a96ac45a..470ace2bcdcd7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
@@ -287,5 +287,68 @@ class StringUtilsSuite extends SparkFunSuite with SQLHelper {
           |END""".stripMargin
       )
     )
+
+    // SPARK-54876: statement after semicolon ending with block comment should not be dropped
+    assert(
+      splitSemiColonWithIndex(
+        "SELECT 1; SELECT 2 /* comment */",
+        enableSqlScripting = false) == Seq("SELECT 1", " SELECT 2 /* comment */")
+    )
+
+    // SPARK-54876: line comment followed by block comment should produce empty result
+    assert(
+      splitSemiColonWithIndex(
+        "-- foo\n/* bar */",
+        enableSqlScripting = false) == Seq()
+    )
+
+    // SPARK-54876: line comment before block comment after semicolon
+    assert(
+      splitSemiColonWithIndex(
+        "SELECT 1; -- foo\n /* bar */",
+        enableSqlScripting = false) == Seq("SELECT 1")
+    )
+
+    // SPARK-54876: nested block comments
+    assert(
+      splitSemiColonWithIndex(
+        "SELECT 1; /* outer /* inner */ */",
+        enableSqlScripting = false) == Seq("SELECT 1")
+    )
+
+    // SPARK-54876: preceding closed block comment + line comment (no SQL statement)
+    assert(
+      splitSemiColonWithIndex(
+        "/* a */ -- foo\n/* b */",
+        enableSqlScripting = false) == Seq()
+    )
+
+    // SPARK-54876: unterminated block comment at EOF
+    assert(
+      splitSemiColonWithIndex(
+        "SELECT 1; /* unterminated",
+        enableSqlScripting = false) == Seq("SELECT 1", " /* unterminated")
+    )
+
+    // SPARK-54876: unterminated string literal (single input, no semicolon)
+    assert(
+      splitSemiColonWithIndex(
+        "'unterminated",
+        enableSqlScripting = false) == Seq("'unterminated")
+    )
+
+    // SPARK-54876: unterminated string literal after semicolon
+    assert(
+      splitSemiColonWithIndex(
+        "SELECT 1; 'unterminated string",
+        enableSqlScripting = false) == Seq("SELECT 1", " 'unterminated string")
+    )
+
+    // SPARK-54876: unterminated block comment (single input, no semicolon)
+    assert(
+      splitSemiColonWithIndex(
+        "/* only a comment that never closes",
+        enableSqlScripting = false) == Seq("/* only a comment that never closes")
+    )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala
new file mode 100644
index 0000000000000..f8e3224fa7e12
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util.concurrent.ConcurrentHashMap
+
+/**
+ * An InMemoryTableCatalog that simulates a caching connector like
+ * Iceberg's CachingCatalog. On first [[loadTable]], returns a fresh
+ * copy. On subsequent loads, returns the CACHED (stale) copy,
+ * making external changes invisible.
+ *
+ * Session writes go through the write-variant [[loadTable]], which is not
+ * cached, so they modify the underlying table directly. Cached [[loadTable]]
+ * results may still be stale until [[clearCache]] or REFRESH TABLE (which
+ * invokes [[invalidateTable]]) is called.
+ *
+ * Only the primary [[loadTable(ident:org\.apache\.spark\.sql\.connector\.catalog\.Identifier)*]]
+ * overload is cached. Version and timestamp overloads bypass the cache, matching
+ * time-travel semantics. [[dropTable]], [[createTable]], and [[alterTable]] do not
+ * invalidate the cache, matching the behavior of real caching connectors.
+ */
+class CachingInMemoryTableCatalog extends InMemoryTableCatalog {
+  private val cachedTables = new ConcurrentHashMap[Identifier, Table]()
+
+  override def loadTable(ident: Identifier): Table =
+    cachedTables.computeIfAbsent(ident, _ => super.loadTable(ident))
+
+  override def invalidateTable(ident: Identifier): Unit = {
+    super.invalidateTable(ident)
+    cachedTables.remove(ident)
+  }
+
+  def clearCache(): Unit = cachedTables.clear()
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
index fc78eef0ff1b8..199e43d39bbe1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
@@ -22,9 +22,11 @@ import java.net.URI
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog => V1InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.connector.catalog.CatalogManager.{CurrentSchemaEntry, LiteralPathEntry}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -39,7 +41,7 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("CatalogManager should reflect the changes of default catalog") {
-    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
+    val catalogManager = new DefaultCatalogManager(FakeV2SessionCatalog, createSessionCatalog())
     assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
     assert(catalogManager.currentNamespace.sameElements(Array("default")))
 
@@ -52,7 +54,7 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("CatalogManager should keep the current catalog once set") {
-    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
+    val catalogManager = new DefaultCatalogManager(FakeV2SessionCatalog, createSessionCatalog())
     assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
     withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
       catalogManager.setCurrentCatalog("dummy")
@@ -68,7 +70,7 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("current namespace should be updated when switching current catalog") {
-    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
+    val catalogManager = new DefaultCatalogManager(FakeV2SessionCatalog, createSessionCatalog())
     withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
       catalogManager.setCurrentCatalog("dummy")
       assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
@@ -93,7 +95,7 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
       CatalogDatabase(
         "test", "", v1SessionCatalog.getDefaultDBPath("test"), Map.empty),
       ignoreIfExists = false)
-    val catalogManager = new CatalogManager(FakeV2SessionCatalog, v1SessionCatalog)
+    val catalogManager = new DefaultCatalogManager(FakeV2SessionCatalog, v1SessionCatalog)
 
     // If the current catalog is session catalog, setting current namespace actually sets
     // `SessionCatalog.currentDb`.
@@ -127,6 +129,138 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
       }
     }
   }
+
+  test("deserializePathEntries parses valid payloads") {
+    val stored =
+      """[["spark_catalog","default"],["system","builtin"],["spark_catalog","db1","ns1"]]"""
+    assert(CatalogManager.deserializePathEntries(stored).contains(Seq(
+      Seq("spark_catalog", "default"),
+      Seq("system", "builtin"),
+      Seq("spark_catalog", "db1", "ns1"))))
+    assert(CatalogManager.deserializePathEntries("[]").contains(Seq.empty))
+  }
+
+  test("deserializePathEntries returns None for malformed payloads") {
+    val malformedPayloads = Seq(
+      "",
+      "not_json",
+      "{}",
+      """["spark_catalog"]""",
+      """[["spark_catalog"], 1]""",
+      """[[1]]""")
+    malformedPayloads.foreach { payload =>
+      assert(CatalogManager.deserializePathEntries(payload).isEmpty, s"payload=$payload")
+    }
+  }
+
+  test("serializePathEntries round-trips through deserialize for typical inputs") {
+    val cases = Seq(
+      Seq(Seq("spark_catalog", "default"), Seq("system", "builtin")),
+      Seq(Seq("system", "session")),
+      Seq.empty[Seq[String]])
+    cases.foreach { entries =>
+      val payload = CatalogManager.serializePathEntries(entries)
+      val parsed = CatalogManager.deserializePathEntries(payload)
+        .getOrElse(fail(s"Expected payload to round-trip: $payload"))
+      assert(parsed === entries, s"Round-trip mismatch for $entries; got $parsed")
+    }
+  }
+
+  test("serializePathEntries round-trips multi-level and quoted identifiers") {
+    val entries = Seq(
+      Seq("cat", "ns1", "ns2"),
+      Seq("spark_catalog", "sch.with.dots"),
+      Seq("spark_catalog", "schema with spaces"))
+    val payload = CatalogManager.serializePathEntries(entries)
+    val parsed = CatalogManager.deserializePathEntries(payload)
+      .getOrElse(fail(s"Expected payload to round-trip: $payload"))
+    assert(parsed === entries)
+  }
+
+  test("deserializePathEntriesOrFail raises a clear AnalysisException for bad payloads") {
+    val e = intercept[AnalysisException] {
+      CatalogManager.deserializePathEntriesOrFail(
+        storedPathStr = "{bad-json",
+        objectType = "view",
+        objectName = "default.v_broken")
+    }
+    assert(e.getMessage.contains("Invalid stored SQL path metadata for view"))
+    assert(e.getMessage.contains("default.v_broken"))
+  }
+
+  // ---------------------------------------------------------------------------
+  // Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The end-to-end
+  // `SetPathSuite` exercises this via SQL, but the duplicate-detection rules
+  // (literal-vs-literal, current_schema-vs-current_schema, case-sensitivity) are pure
+  // data and benefit from focused tests close to the implementation.
+  // ---------------------------------------------------------------------------
+
+  private def literalEntry(parts: String*): LiteralPathEntry = LiteralPathEntry(parts.toSeq)
+
+  test("validateNoStaticDuplicates: no duplicates returns the input unchanged") {
+    val entries = Seq(
+      literalEntry("spark_catalog", "default"),
+      literalEntry("system", "builtin"),
+      CurrentSchemaEntry)
+    assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries)
+  }
+
+  test("validateNoStaticDuplicates: duplicate literal under case-insensitive collation") {
+    val entries = Seq(
+      literalEntry("spark_catalog", "default"),
+      literalEntry("Spark_Catalog", "DEFAULT"))
+    val e = intercept[AnalysisException] {
+      PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
+    }
+    assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY")
+    assert(e.getMessageParameters.get("pathEntry") == "Spark_Catalog.DEFAULT")
+  }
+
+  test("validateNoStaticDuplicates: case-sensitive mode keeps differently cased entries") {
+    val entries = Seq(
+      literalEntry("spark_catalog", "DEFAULT"),
+      literalEntry("spark_catalog", "default"))
+    assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = true) === entries)
+  }
+
+  test("validateNoStaticDuplicates: repeated CurrentSchemaEntry is rejected") {
+    val entries = Seq(CurrentSchemaEntry, CurrentSchemaEntry)
+    val e = intercept[AnalysisException] {
+      PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
+    }
+    assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY")
+    assert(e.getMessageParameters.get("pathEntry") == "current_schema")
+  }
+
+  test("validateNoStaticDuplicates: literal-vs-CurrentSchemaEntry collision is tolerated") {
+    // The CurrentSchemaEntry marker resolves dynamically against USE SCHEMA, so a literal
+    // that happens to match the live current schema is intentionally not flagged here.
+    val entries = Seq(
+      literalEntry("spark_catalog", "default"),
+      CurrentSchemaEntry,
+      literalEntry("system", "builtin"))
+    assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries)
+  }
+
+  test("validateNoStaticDuplicates: identifier containing a dot is quoted in the error") {
+    val entries = Seq(
+      literalEntry("spark_catalog", "weird.schema"),
+      literalEntry("spark_catalog", "weird.schema"))
+    val e = intercept[AnalysisException] {
+      PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
+    }
+    assert(e.getMessageParameters.get("pathEntry") == "spark_catalog.`weird.schema`")
+  }
+
+  test("validateNoStaticDuplicates: multi-level namespace duplicate is flagged") {
+    val entries = Seq(
+      literalEntry("cat", "db", "ns"),
+      literalEntry("cat", "db", "ns"))
+    val e = intercept[AnalysisException] {
+      PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
+    }
+    assert(e.getMessageParameters.get("pathEntry") == "cat.db.ns")
+  }
 }
 
 class DummyCatalog extends CatalogPlugin {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/ComposedColumnIdTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/ComposedColumnIdTableCatalog.scala
new file mode 100644
index 0000000000000..64488a76db7f3
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/ComposedColumnIdTableCatalog.scala
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util.Locale
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.internal.connector.ColumnImpl
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
+
+/**
+ * An [[InMemoryTableCatalog]] that tracks IDs at every nesting level
+ * (struct fields, array elements, map keys/values) and encodes the full
+ * subtree into each top-level [[Column.id]] string.
+ *
+ * This demonstrates how a connector that wants to detect nested changes
+ * can encode nested IDs into the top-level [[Column.id]] string.
+ * Any nested change (drop+re-add a struct field, etc.) produces a
+ * different encoded top-level string, so [[V2TableUtil.validateColumnIds]]
+ * detects it without Spark needing to traverse below the top level.
+ *
+ * Nested positions are keyed by ordinal path (`Seq[Int]`), not by field
+ * name. This matches Delta/Iceberg semantics where rename preserves the
+ * column ID: a renamed field stays at the same ordinal position, so the
+ * composed string is unchanged and schema validation catches the rename
+ * via the differing [[StructType]].
+ *
+ * Example: for a column `person STRUCT<name: STRING, age: INT>` with
+ * root ID 5 and nested field IDs position 0 (name) = 10,
+ * position 1 (age) = 11, the composed [[Column.id]] string is
+ * `"5[0:10,1:11]"`. If `age` is dropped and re-added, the new age gets
+ * ID 12, producing `"5[0:10,1:12]"`. Spark sees different strings and
+ * fires `COLUMN_ID_MISMATCH`.
+ */
+class ComposedColumnIdTableCatalog extends InMemoryTableCatalog {
+
+  // Per-table nested ID maps.
+  // Structure: tableIdentifier -> (columnName -> nestedFieldIdMap)
+  // where nestedFieldIdMap maps an ordinal path to its assigned ID.
+  //
+  // For column `person STRUCT<name: STRING, addr: STRUCT<city: STRING>>`:
+  //   "person" -> {
+  //     Seq(0) -> 10,       // name
+  //     Seq(1) -> 11,       // addr
+  //     Seq(1, 0) -> 12     // addr.city
+  //   }
+  private val nestedIdMaps =
+    new ConcurrentHashMap[Identifier, mutable.Map[String, mutable.Map[Seq[Int], Long]]]()
+
+  // Bare (uncomposed) root IDs, tracked separately to avoid double-encoding.
+  // Structure: tableIdentifier -> (columnName -> bareRootIdString)
+  private val rootIds =
+    new ConcurrentHashMap[Identifier, mutable.Map[String, String]]()
+
+  override def createTable(
+      ident: Identifier, info: TableInfo): Table = {
+    val table = super.createTable(ident, info).asInstanceOf[InMemoryTable]
+    val allColumnNestedIds = mutable.Map[String, mutable.Map[Seq[Int], Long]]()
+    val allRootIds = mutable.Map[String, String]()
+
+    val composedColumns: Array[Column] = table.columns().map { column =>
+      val nestedFieldIds = mutable.Map[Seq[Int], Long]()
+      assignNestedIds(column.dataType(), parentPath = Seq.empty, nestedFieldIds)
+      val columnName = column.name().toLowerCase(Locale.ROOT)
+      allColumnNestedIds(columnName) = nestedFieldIds
+      allRootIds(columnName) = column.id()
+      val composedId = encodeComposedId(column.id(), nestedFieldIds)
+      column.asInstanceOf[ColumnImpl].copy(id = composedId): Column
+    }
+
+    nestedIdMaps.put(ident, allColumnNestedIds)
+    rootIds.put(ident, allRootIds)
+
+    val composedTable = new InMemoryTable(
+      table.name,
+      composedColumns,
+      table.partitioning,
+      table.properties,
+      table.constraints,
+      id = table.id)
+    composedTable.alterTableWithData(table.data, table.schema)
+    composedTable.setVersionAndValidatedVersionFrom(table)
+    tables.put(ident, composedTable)
+    composedTable
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    val oldTable = loadTable(ident).asInstanceOf[InMemoryTable]
+    val oldColumnNestedIds = Option(nestedIdMaps.get(ident))
+      .getOrElse(mutable.Map[String, mutable.Map[Seq[Int], Long]]())
+    val oldRootIds = Option(rootIds.get(ident))
+      .getOrElse(mutable.Map[String, String]())
+
+    val alteredTable = super.alterTable(ident, changes: _*).asInstanceOf[InMemoryTable]
+
+    val allColumnNestedIds = mutable.Map[String, mutable.Map[Seq[Int], Long]]()
+    val allRootIds = mutable.Map[String, String]()
+    val composedColumns: Array[Column] = alteredTable.columns().map { newColumn =>
+      val columnName = newColumn.name().toLowerCase(Locale.ROOT)
+      val oldNestedFieldIds =
+        oldColumnNestedIds.getOrElse(columnName, mutable.Map[Seq[Int], Long]())
+
+      // Find the old column to compare data types for merging nested IDs
+      val oldColumnOpt = oldTable.columns()
+        .find(oldCol => oldCol.name().toLowerCase(Locale.ROOT) == columnName)
+
+      val newNestedFieldIds = oldColumnOpt match {
+        case Some(oldColumn) =>
+          // Column existed before: preserve IDs for positions that still exist,
+          // assign fresh IDs for new positions (e.g. a re-added nested field)
+          mergeNestedIds(oldNestedFieldIds, oldColumn.dataType(), newColumn.dataType())
+        case None =>
+          // Brand new column: assign fresh IDs to all nested positions
+          val freshIds = mutable.Map[Seq[Int], Long]()
+          assignNestedIds(newColumn.dataType(), parentPath = Seq.empty, freshIds)
+          freshIds
+      }
+
+      allColumnNestedIds(columnName) = newNestedFieldIds
+
+      // super.alterTable preserves IDs by name, so newColumn.id() is
+      // the previously composed string (e.g. "5[0:10,1:11]"). Passing
+      // that to encodeComposedId would produce "5[0:10,1:11][0:10,1:12]"
+      // instead of "5[0:10,1:12]". Use the original root ID (e.g. "5")
+      // from rootIds instead; fall back to newColumn.id() only for
+      // genuinely new columns whose ID is a fresh numeric string.
+      val rootId = oldRootIds.getOrElse(columnName, newColumn.id())
+      allRootIds(columnName) = rootId
+      val composedId = encodeComposedId(rootId, newNestedFieldIds)
+      newColumn.asInstanceOf[ColumnImpl].copy(id = composedId): Column
+    }
+
+    nestedIdMaps.put(ident, allColumnNestedIds)
+    rootIds.put(ident, allRootIds)
+
+    val composedTable = new InMemoryTable(
+      alteredTable.name,
+      composedColumns,
+      alteredTable.partitioning,
+      alteredTable.properties,
+      alteredTable.constraints,
+      id = alteredTable.id)
+    composedTable.alterTableWithData(alteredTable.data, alteredTable.schema)
+    composedTable.setVersionAndValidatedVersionFrom(alteredTable)
+    tables.put(ident, composedTable)
+    composedTable
+  }
+
+  /**
+   * Recursively assigns fresh IDs to every nested position in a data type:
+   * struct fields, array elements, map keys, and map values.
+   *
+   * Each position is identified by an ordinal path from the column root:
+   *
+   * `STRUCT<name: STRING, addr: STRUCT<city: STRING>>` produces:
+   *   - Seq(0) -> id1       (name, position 0)
+   *   - Seq(1) -> id2       (addr, position 1)
+   *   - Seq(1, 0) -> id3    (addr.city, position 0 within addr)
+   *
+   * `ARRAY<STRUCT<x: INT>>` produces:
+   *   - Seq(0) -> id1       (element, position 0)
+   *   - Seq(0, 0) -> id2    (element.x, position 0 within element)
+   *
+   * `MAP<STRING, STRUCT<v: INT>>` produces:
+   *   - Seq(0) -> id1       (key, position 0)
+   *   - Seq(1) -> id2       (value, position 1)
+   *   - Seq(1, 0) -> id3    (value.v, position 0 within value)
+   */
+  private def assignNestedIds(
+      dataType: DataType,
+      parentPath: Seq[Int],
+      nestedFieldIds: mutable.Map[Seq[Int], Long]): Unit = {
+    dataType match {
+      case structType: StructType =>
+        structType.fields.zipWithIndex.foreach { case (field, idx) =>
+          val fieldPath = parentPath :+ idx
+          nestedFieldIds(fieldPath) = InMemoryBaseTable.nextColumnId()
+          assignNestedIds(field.dataType, fieldPath, nestedFieldIds)
+        }
+      case ArrayType(elementType, _) =>
+        val elementPath = parentPath :+ 0
+        nestedFieldIds(elementPath) = InMemoryBaseTable.nextColumnId()
+        assignNestedIds(elementType, elementPath, nestedFieldIds)
+      case MapType(keyType, valueType, _) =>
+        val keyPath = parentPath :+ 0
+        nestedFieldIds(keyPath) = InMemoryBaseTable.nextColumnId()
+        assignNestedIds(keyType, keyPath, nestedFieldIds)
+        val valuePath = parentPath :+ 1
+        nestedFieldIds(valuePath) = InMemoryBaseTable.nextColumnId()
+        assignNestedIds(valueType, valuePath, nestedFieldIds)
+      case _ => // primitive types have no nested structure
+    }
+  }
+
+  /**
+   * Merges nested IDs from old to new: preserves IDs for ordinal positions
+   * that exist in both old and new types, assigns fresh IDs for new positions.
+   *
+   * For example, if the old type is `STRUCT<name: STRING, age: INT>` with
+   * IDs {Seq(0)->10, Seq(1)->11}, and the new type is
+   * `STRUCT<name: STRING, age: INT>` after drop+re-add of `age`, then `age`
+   * gets a fresh ID 12 because its position was removed and re-added, while
+   * `name` keeps ID 10.
+   */
+  private def mergeNestedIds(
+      oldFieldIds: mutable.Map[Seq[Int], Long],
+      oldType: DataType,
+      newType: DataType): mutable.Map[Seq[Int], Long] = {
+    val mergedFieldIds = mutable.Map[Seq[Int], Long]()
+    walkAndMerge(newType, parentPath = Seq.empty, mergedFieldIds, oldFieldIds)
+    mergedFieldIds
+  }
+
+  /**
+   * Walks the new data type and for each nested position, either preserves
+   * the old ID (if the ordinal path existed before) or assigns a fresh one.
+   */
+  private def walkAndMerge(
+      dataType: DataType,
+      parentPath: Seq[Int],
+      mergedFieldIds: mutable.Map[Seq[Int], Long],
+      oldFieldIds: mutable.Map[Seq[Int], Long]): Unit = {
+    dataType match {
+      case structType: StructType =>
+        structType.fields.zipWithIndex.foreach { case (field, idx) =>
+          val fieldPath = parentPath :+ idx
+          mergedFieldIds(fieldPath) =
+            oldFieldIds.getOrElse(fieldPath, InMemoryBaseTable.nextColumnId())
+          walkAndMerge(field.dataType, fieldPath, mergedFieldIds, oldFieldIds)
+        }
+      case ArrayType(elementType, _) =>
+        val elementPath = parentPath :+ 0
+        mergedFieldIds(elementPath) =
+          oldFieldIds.getOrElse(elementPath, InMemoryBaseTable.nextColumnId())
+        walkAndMerge(elementType, elementPath, mergedFieldIds, oldFieldIds)
+      case MapType(keyType, valueType, _) =>
+        val keyPath = parentPath :+ 0
+        mergedFieldIds(keyPath) =
+          oldFieldIds.getOrElse(keyPath, InMemoryBaseTable.nextColumnId())
+        walkAndMerge(keyType, keyPath, mergedFieldIds, oldFieldIds)
+        val valuePath = parentPath :+ 1
+        mergedFieldIds(valuePath) =
+          oldFieldIds.getOrElse(valuePath, InMemoryBaseTable.nextColumnId())
+        walkAndMerge(valueType, valuePath, mergedFieldIds, oldFieldIds)
+      case _ =>
+    }
+  }
+
+  /**
+   * Encodes a root ID and its nested field IDs into a single deterministic string.
+   * Format: `rootId[path1:id1,path2:id2,...]` with paths sorted
+   * lexicographically by their dot-joined ordinal representation.
+   *
+   * Example: column `person STRUCT<name: STRING, age: INT>` with root ID "5"
+   * and nested field IDs {Seq(0)->10, Seq(1)->11} encodes as:
+   * `"5[0:10,1:11]"`
+   *
+   * If the column has no nested fields (e.g. `INT`), returns just the root ID.
+   */
+  private def encodeComposedId(
+      rootId: String,
+      nestedFieldIds: mutable.Map[Seq[Int], Long]): String = {
+    if (nestedFieldIds.isEmpty) {
+      rootId
+    } else {
+      val sortedEntries = nestedFieldIds.toSeq.sortBy(_._1.mkString("."))
+      val encoded = sortedEntries.map { case (fieldPath, fieldId) =>
+        s"${fieldPath.mkString(".")}:$fieldId"
+      }.mkString(",")
+      s"$rootId[$encoded]"
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
index fd2c0f6e9c2ec..8bfcfc020fa12 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql.connector.catalog
 import java.time.{Instant, ZoneId}
 import java.time.temporal.ChronoUnit
 import java.util
+import java.util.Locale
 import java.util.Objects
 import java.util.OptionalLong
+import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.mutable
 import scala.collection.mutable.ListBuffer
@@ -38,10 +40,11 @@ import org.apache.spark.sql.connector.metric.{CustomMetric, CustomSumMetric, Cus
 import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.connector.read.colstats.{ColumnStatistics, Histogram, HistogramBin}
 import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
+import org.apache.spark.sql.internal.connector.{ColumnImpl, SupportsStreamingUpdateAsAppend}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -72,16 +75,62 @@ abstract class InMemoryBaseTable(
   // Stores the table version validated during the last `ALTER TABLE ... ADD CONSTRAINT` operation.
   private var validatedTableVersion: String = null
 
-  private var tableColumns: Array[Column] = initialColumns
+  // Assign column IDs to columns that do not have one.
+  // This simulates connectors that support column identity tracking.
+  private var tableColumns: Array[Column] = initialColumns.map { c =>
+    if (c.id() == null) {
+      c.asInstanceOf[ColumnImpl].copy(id = InMemoryBaseTable.nextColumnId().toString)
+    } else {
+      c
+    }
+  }
 
   override def columns(): Array[Column] = tableColumns
 
+  private[catalog] def updateColumns(newColumns: Array[Column]): Unit = {
+    tableColumns = newColumns
+  }
+
   override def version(): String = tableVersion.toString
 
   def setVersion(version: String): Unit = {
     tableVersion = version.toInt
   }
 
+  /**
+   * Copies version and validated version from another table.
+   *
+   * Some test catalogs (e.g. [[NullColumnIdInMemoryTableCatalog]],
+   * [[NullTableIdAndNullColumnIdInMemoryTableCatalog]]) create a new table object
+   * that overrides specific behavior (such as nulling out column IDs). The new
+   * object's version counter starts at 0. Without this call, the version counter
+   * resets every time the catalog creates such a replacement table, breaking the
+   * monotonic-version assumption that downstream consumers rely on (e.g.
+   * [[InMemoryTable]].copy, validated-version propagation, and the join-refresh
+   * tests in [[DSv2IncrementallyConstructedQueryTests]]).
+   */
+  def setVersionAndValidatedVersionFrom(sourceTable: InMemoryBaseTable): Unit = {
+    setVersion(sourceTable.version())
+    if (sourceTable.validatedVersion() != null) {
+      setValidatedVersion(sourceTable.validatedVersion())
+    }
+  }
+
+  // Version-aware equality: two tables refer to the same metastore entity at the same state.
+  // Fall back to reference equality when `id()` is null (no metastore identity).
+  override def equals(obj: Any): Boolean = obj match {
+    case other: InMemoryBaseTable =>
+      if (this eq other) true
+      else if (id() == null || other.id() == null) false
+      else id() == other.id() && version() == other.version()
+    case _ => false
+  }
+
+  override def hashCode(): Int = {
+    if (id() == null) System.identityHashCode(this)
+    else java.util.Objects.hash(id(), version())
+  }
+
   def increaseVersion(): Unit = {
     tableVersion += 1
   }
@@ -94,6 +143,8 @@ abstract class InMemoryBaseTable(
     validatedTableVersion = version
   }
 
+  protected def recordScanEvent(filters: Array[Filter]): Unit = {}
+
   protected object PartitionKeyColumn extends MetadataColumn {
     override def name: String = "_partition"
     override def dataType: DataType = StringType
@@ -380,24 +431,52 @@ abstract class InMemoryBaseTable(
   def alterTableWithData(
       data: Array[BufferedRows],
       newSchema: StructType): InMemoryBaseTable = {
+    val newFieldNames = newSchema.fieldNames.toSet
     data.foreach { bufferedRow =>
       val oldSchema = bufferedRow.schema
+
+      // Identify which columns from the old schema still exist in the new schema.
+      // Each entry is (StructField, original index in old row) so we can extract values later.
+      val fieldsRetainedInOldSchema = oldSchema.fields.zipWithIndex.filter {
+        case (oldField, _) => newFieldNames.contains(oldField.name)
+      }
+      val areColumnsDropped = fieldsRetainedInOldSchema.length < oldSchema.length
+
+      // Build a schema that only contains the retained columns.
+      // This becomes the write schema for the migrated rows.
+      val retainedSchemaAfterDroppedColumns = if (areColumnsDropped) {
+        StructType(fieldsRetainedInOldSchema.map(_._1))
+      } else {
+        oldSchema
+      }
+
       bufferedRow.rows.foreach { row =>
+        // Physically remove dropped column values from the row so they do not
+        // survive through ALTER chains (e.g. DROP COLUMN then ADD COLUMN same name).
+        val retainedRowAfterDroppedColumns = if (areColumnsDropped) {
+          new GenericInternalRow(fieldsRetainedInOldSchema.map {
+            case (retainedField, idx) => row.get(idx, retainedField.dataType)
+          })
+        } else {
+          row
+        }
+
         // handle partition evolution by re-keying all data
-        val key = getKey(row, newSchema)
+        val key = getKey(retainedRowAfterDroppedColumns, newSchema)
         dataMap += dataMap.get(key)
           .map { splits =>
             val newSplits = if ((splits.last.rows.size >= numRowsPerSplit) ||
-                (splits.last.schema != oldSchema)) {
-              splits :+ new BufferedRows(key, oldSchema)
+                (splits.last.schema != retainedSchemaAfterDroppedColumns)) {
+              splits :+ new BufferedRows(key, retainedSchemaAfterDroppedColumns)
             } else {
               splits
             }
-            newSplits.last.withRow(row)
+            newSplits.last.withRow(retainedRowAfterDroppedColumns)
             key -> newSplits
           }
           .getOrElse(key -> Seq(
-            new BufferedRows(key, oldSchema).withRow(row)))
+            new BufferedRows(key, retainedSchemaAfterDroppedColumns)
+              .withRow(retainedRowAfterDroppedColumns)))
         addPartitionKey(key)
       }
     }
@@ -406,6 +485,7 @@ abstract class InMemoryBaseTable(
 
   def baseCapabiilities: Set[TableCapability] = Set(
     TableCapability.BATCH_READ,
+    TableCapability.MICRO_BATCH_READ,
     TableCapability.BATCH_WRITE,
     TableCapability.STREAMING_WRITE,
     TableCapability.OVERWRITE_BY_FILTER,
@@ -455,6 +535,8 @@ abstract class InMemoryBaseTable(
       if (evaluableFilters.nonEmpty) {
         scan.filter(evaluableFilters)
       }
+      scan.pushedFilters = _pushedFilters
+      recordScanEvent(_pushedFilters)
       scan
     }
 
@@ -494,6 +576,33 @@ abstract class InMemoryBaseTable(
 
   case class InMemoryHistogram(height: Double, bins: Array[HistogramBin]) extends Histogram
 
+  private class InMemoryTableOffset(val rowCount: Long) extends Offset {
+    override def json(): String = rowCount.toString
+  }
+
+  class InMemoryMicroBatchStream(readSchema: StructType, tableSchema: StructType)
+      extends MicroBatchStream {
+    override def initialOffset(): Offset = new InMemoryTableOffset(0)
+    override def latestOffset(): Offset =
+      new InMemoryTableOffset(InMemoryBaseTable.this.rows.size.toLong)
+    override def planInputPartitions(start: Offset, end: Offset): Array[InputPartition] = {
+      val s = start.asInstanceOf[InMemoryTableOffset].rowCount.toInt
+      val e = end.asInstanceOf[InMemoryTableOffset].rowCount.toInt
+      Array(InMemoryMicroBatchPartition(InMemoryBaseTable.this.rows.slice(s, e)))
+    }
+    override def createReaderFactory(): PartitionReaderFactory = {
+      val metadataColNames = new mutable.ArrayBuffer[String]()
+      readSchema.foreach {
+        case MetadataStructFieldWithLogicalName(_, name) => metadataColNames += name
+        case _ =>
+      }
+      new InMemoryMicroBatchReaderFactory(metadataColNames.toArray)
+    }
+    override def deserializeOffset(json: String): Offset = new InMemoryTableOffset(json.toLong)
+    override def commit(end: Offset): Unit = {}
+    override def stop(): Unit = {}
+  }
+
   abstract class BatchScanBaseClass(
       var data: Seq[InputPartition],
       readSchema: StructType,
@@ -579,6 +688,9 @@ abstract class InMemoryBaseTable(
     override def supportedCustomMetrics(): Array[CustomMetric] = {
       Array(new RowsReadCustomMetric)
     }
+
+    override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream =
+      new InMemoryMicroBatchStream(readSchema, tableSchema)
   }
 
   case class InMemoryBatchScan(
@@ -588,6 +700,12 @@ abstract class InMemoryBaseTable(
       options: CaseInsensitiveStringMap)
     extends BatchScanBaseClass(_data, readSchema, tableSchema) with SupportsRuntimeFiltering {
 
+    // Back-pointer to the table this scan was built against.
+    val table: InMemoryBaseTable = InMemoryBaseTable.this
+
+    // The filters pushed to this scan at build time.
+    var pushedFilters: Array[Filter] = Array.empty
+
     override def filterAttributes(): Array[NamedReference] = {
       val scanFields = readSchema.fields.map(_.name).toSet
       partitioning.flatMap(_.references)
@@ -659,8 +777,13 @@ abstract class InMemoryBaseTable(
 
       override def toBatch: BatchWrite = {
         val newSchema = info.schema()
-        tableColumns = CatalogV2Util.structTypeToV2Columns(
-          mergeSchema(CatalogV2Util.v2ColumnsToStructType(columns()), newSchema))
+        val mergedSchema = mergeSchema(
+          oldType = CatalogV2Util.v2ColumnsToStructType(columns()),
+          newType = newSchema)
+        val newColumns = CatalogV2Util.structTypeToV2Columns(mergedSchema)
+        tableColumns = InMemoryBaseTable.assignMissingIds(
+          oldColumns = columns(),
+          newColumns = newColumns)
         writer
       }
 
@@ -706,30 +829,43 @@ abstract class InMemoryBaseTable(
     }
 
     override def abort(messages: Array[WriterCommitMessage]): Unit = {}
+
+    protected def doCommit(messages: Array[WriterCommitMessage]): Unit
+
+    override final def commit(messages: Array[WriterCommitMessage]): Unit = {
+      doCommit(messages)
+      commits += Commit(Instant.now().toEpochMilli)
+    }
+
+    override final def commit(
+        messages: Array[WriterCommitMessage],
+        summary: WriteSummary): Unit = {
+      doCommit(messages)
+      commits += Commit(Instant.now().toEpochMilli, writeSummary = Some(summary))
+    }
   }
 
   class Append(val info: LogicalWriteInfo) extends TestBatchWrite {
-
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+    override protected def doCommit(
+        messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       withData(messages.map(_.asInstanceOf[BufferedRows]))
-      commits += Commit(Instant.now().toEpochMilli)
     }
   }
 
   class DynamicOverwrite(val info: LogicalWriteInfo) extends TestBatchWrite {
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+    override protected def doCommit(
+        messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       val newData = messages.map(_.asInstanceOf[BufferedRows])
       dataMap --= newData.flatMap(_.rows.map(getKey))
       withData(newData)
-      commits += Commit(Instant.now().toEpochMilli)
     }
   }
 
   class TruncateAndAppend(val info: LogicalWriteInfo) extends TestBatchWrite {
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+    override protected def doCommit(
+        messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       dataMap.clear()
       withData(messages.map(_.asInstanceOf[BufferedRows]))
-      commits += Commit(Instant.now().toEpochMilli)
     }
   }
 
@@ -778,6 +914,34 @@ abstract class InMemoryBaseTable(
 }
 
 object InMemoryBaseTable {
+  private val columnIdGlobalCounter = new AtomicLong(0)
+  def nextColumnId(): Long = columnIdGlobalCounter.incrementAndGet()
+
+  private def normalize(name: String): String = name.toLowerCase(Locale.ROOT)
+
+  /**
+   * Preserves column IDs from `oldColumns` when the column name matches,
+   * and assigns new IDs to columns that do not already have one.
+   *
+   * IDs are preserved across type changes, keeping the same column ID through type
+   * widening and nested field additions. [[TypeChangeResetsColIdTableCatalog]] overrides
+   * this behavior for testing scenarios where type changes should produce a new ID.
+   */
+  def assignMissingIds(
+      oldColumns: Array[Column],
+      newColumns: Array[Column]): Array[Column] = {
+    newColumns.map { newCol =>
+      oldColumns.find(c => normalize(c.name()) == normalize(newCol.name())) match {
+        case Some(oldCol) if oldCol.id() != null =>
+          newCol.asInstanceOf[ColumnImpl].copy(id = oldCol.id())
+        case _ if newCol.id() == null =>
+          newCol.asInstanceOf[ColumnImpl].copy(id = nextColumnId().toString)
+        case _ =>
+          newCol
+      }
+    }
+  }
+
   val SIMULATE_FAILED_WRITE_OPTION = "spark.sql.test.simulateFailedWrite"
 
   def extractValue(
@@ -799,6 +963,11 @@ object InMemoryBaseTable {
   }
 }
 
+/**
+ * A partition for [[InMemoryBaseTable]] micro-batch streaming reads, holding a slice of rows.
+ */
+case class InMemoryMicroBatchPartition(rows: Seq[InternalRow]) extends InputPartition
+
 /**
  * Represent a set of rows buffered in memory for a given partition key.
  * @param key partition key
@@ -826,6 +995,30 @@ class BufferedRows(val key: Seq[Any], val schema: StructType)
   def clear(): Unit = rows.clear()
 }
 
+private class InMemoryMicroBatchReaderFactory(
+    metaNames: Array[String]) extends PartitionReaderFactory with Serializable {
+  override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
+    val rows = partition.asInstanceOf[InMemoryMicroBatchPartition].rows
+    new PartitionReader[InternalRow] {
+      private var idx = -1
+      override def next(): Boolean = { idx += 1; idx < rows.size }
+      override def get(): InternalRow = {
+        val rawRow = rows(idx)
+        if (metaNames.isEmpty) rawRow
+        else {
+          val metaRow = new GenericInternalRow(metaNames.map {
+            case "index" => idx.asInstanceOf[Any]
+            case "_partition" => UTF8String.fromString("").asInstanceOf[Any]
+            case _ => null
+          })
+          new JoinedRow(rawRow, metaRow)
+        }
+      }
+      override def close(): Unit = {}
+    }
+  }
+}
+
 object BufferedRows {
   def apply(key: Seq[Any], schema: Array[Column]): BufferedRows = {
     new BufferedRows(key, CatalogV2Util.v2ColumnsToStructType(schema))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala
index c47ed2668e3b4..0c1def1ac55c2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange}
+import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference}
 import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.types._
@@ -39,15 +40,36 @@ class InMemoryChangelogCatalog extends InMemoryCatalog {
   private val changeData: mutable.Map[String, mutable.ArrayBuffer[InternalRow]] =
     mutable.Map.empty
 
-  // Stores the most recent ChangelogInfo passed to loadChangelog(), so tests can verify
-  // that the parser/DataFrame API correctly constructed and forwarded it.
-  private var _lastChangelogInfo: Option[ChangelogInfo] = None
-  def lastChangelogInfo: Option[ChangelogInfo] = _lastChangelogInfo
+  // Stores the most recent ChangelogContext and options passed to loadChangelog(), so tests
+  // can verify that the parser/DataFrame API correctly constructed and forwarded them.
+  private var _lastChangelogContext: Option[ChangelogContext] = None
+  def lastChangelogContext: Option[ChangelogContext] = _lastChangelogContext
+
+  private var _lastOptions: Option[CaseInsensitiveStringMap] = None
+  def lastOptions: Option[CaseInsensitiveStringMap] = _lastOptions
+
+  // Per-table overrides for Changelog properties (carry-over rows, intermediate changes,
+  // update representation, row identity). Tests can set these to exercise post-processing.
+  private val changelogProperties: mutable.Map[String, ChangelogProperties] =
+    mutable.Map.empty
+
+  /**
+   * Override the [[Changelog]] properties returned for a given table.
+   * Defaults are: containsCarryoverRows=false, containsIntermediateChanges=false,
+   * representsUpdateAsDeleteAndInsert=false, no rowId, no rowVersion.
+   */
+  def setChangelogProperties(
+      ident: Identifier,
+      properties: ChangelogProperties): Unit = {
+    changelogProperties(ident.toString) = properties
+  }
 
   override def loadChangelog(
       ident: Identifier,
-      changelogInfo: ChangelogInfo): Changelog = {
-    _lastChangelogInfo = Some(changelogInfo)
+      changelogContext: ChangelogContext,
+      options: CaseInsensitiveStringMap): Changelog = {
+    _lastChangelogContext = Some(changelogContext)
+    _lastOptions = Some(options)
     if (!tableExists(ident)) {
       throw new NoSuchTableException(ident.asMultipartIdentifier)
     }
@@ -57,9 +79,10 @@ class InMemoryChangelogCatalog extends InMemoryCatalog {
     val numDataCols = table.columns.length
     // _commit_version is at index numDataCols + 1 (after _change_type)
     val commitVersionIdx = numDataCols + 1
-    val filtered = filterByRange(allRows.toSeq, commitVersionIdx, changelogInfo.range())
+    val filtered = filterByRange(allRows.toSeq, commitVersionIdx, changelogContext.range())
+    val props = changelogProperties.getOrElse(ident.toString, ChangelogProperties())
     new InMemoryChangelog(
-      table.name + "_changelog", table.columns, filtered)
+      table.name + "_changelog", table.columns, filtered, props)
   }
 
   /**
@@ -109,30 +132,76 @@ class InMemoryChangelogCatalog extends InMemoryCatalog {
   }
 }
 
+/**
+ * Configurable properties for [[InMemoryChangelog]] that test cases can use to exercise
+ * Spark's post-processing (carry-over removal, update detection, net changes).
+ *
+ * @param containsCarryoverRows whether the change stream may contain identical CoW pairs
+ * @param containsIntermediateChanges whether multiple changes per row may exist
+ * @param representsUpdateAsDeleteAndInsert whether updates appear as raw delete+insert
+ * @param rowIdNames optional row identity columns as top-level names (e.g. Seq("id"))
+ * @param rowIdPaths optional row identity paths for nested struct fields
+ *                   (e.g. Seq(Seq("payload", "id"))); takes precedence over rowIdNames
+ * @param rowVersionName optional row version column (e.g. Some("row_commit_version"));
+ *                       must be a per-row version that distinguishes carry-overs from
+ *                       real updates. Do NOT pass the commit version, which is constant
+ *                       within a partition and would cause every delete+insert pair to
+ *                       look like a carry-over
+ * @param commitTimestampNullable whether the connector declares `_commit_timestamp` as
+ *                                nullable. Defaults to `true`. Tests that need to
+ *                                exercise NullPropagation behaviour on a non-nullable
+ *                                schema can set this to `false`.
+ */
+case class ChangelogProperties(
+    containsCarryoverRows: Boolean = false,
+    containsIntermediateChanges: Boolean = false,
+    representsUpdateAsDeleteAndInsert: Boolean = false,
+    rowIdNames: Seq[String] = Seq.empty,
+    rowIdPaths: Seq[Seq[String]] = Seq.empty,
+    rowVersionName: Option[String] = None,
+    commitTimestampNullable: Boolean = true)
+
 /**
  * A test [[Changelog]] that returns pre-populated change rows.
  *
- * Reports `containsCarryoverRows = false` so Spark skips carry-over removal.
+ * Properties (carry-over presence, update representation, row identity) are configurable
+ * via the [[ChangelogProperties]] parameter so tests can exercise different code paths
+ * in Spark's post-processing analyzer rule.
  */
 class InMemoryChangelog(
     tableName: String,
     dataColumns: Array[Column],
-    changeRows: Seq[InternalRow]) extends Changelog {
+    changeRows: Seq[InternalRow],
+    properties: ChangelogProperties = ChangelogProperties()) extends Changelog {
 
   private val cdcColumns: Array[Column] = dataColumns ++ Array(
     Column.create("_change_type", StringType),
     Column.create("_commit_version", LongType),
-    Column.create("_commit_timestamp", TimestampType))
+    Column.create("_commit_timestamp", TimestampType, properties.commitTimestampNullable))
 
   override def name(): String = tableName
 
   override def columns(): Array[Column] = cdcColumns
 
-  override def containsCarryoverRows(): Boolean = false
+  override def containsCarryoverRows(): Boolean = properties.containsCarryoverRows
+
+  override def containsIntermediateChanges(): Boolean = properties.containsIntermediateChanges
 
-  override def containsIntermediateChanges(): Boolean = false
+  override def representsUpdateAsDeleteAndInsert(): Boolean =
+    properties.representsUpdateAsDeleteAndInsert
 
-  override def representsUpdateAsDeleteAndInsert(): Boolean = false
+  override def rowId(): Array[NamedReference] = {
+    if (properties.rowIdPaths.nonEmpty) {
+      properties.rowIdPaths.map(parts => FieldReference(parts): NamedReference).toArray
+    } else {
+      properties.rowIdNames.map(name => FieldReference.column(name): NamedReference).toArray
+    }
+  }
+
+  override def rowVersion(): NamedReference = properties.rowVersionName match {
+    case Some(name) => FieldReference.column(name)
+    case None => super.rowVersion()
+  }
 
   override def newScanBuilder(
       options: CaseInsensitiveStringMap): ScanBuilder = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryEnhancedPartitionFilterTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryEnhancedPartitionFilterTable.scala
index 4cfca8a62f579..979cf1fded745 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryEnhancedPartitionFilterTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryEnhancedPartitionFilterTable.scala
@@ -73,6 +73,15 @@ class InMemoryEnhancedPartitionFilterTable(
         InMemoryEnhancedPartitionFilterTable.AcceptDataPredicatesKey, "false")
         .toBoolean
 
+    // Default false. When true, first-pass partition predicates that are accepted (pushed) are
+    // also returned for post-scan evaluation, simulating a partial pushdown (e.g. a Parquet
+    // row group filter). Such predicates are reported by pushedPredicates() but still appear in
+    // the pushPredicates() return value.
+    private val returnAcceptedPartitionPredicates =
+      InMemoryEnhancedPartitionFilterTable.this.properties.getOrDefault(
+        InMemoryEnhancedPartitionFilterTable.ReturnAcceptedPartitionPredicatesKey, "false")
+        .toBoolean
+
     override def supportsIterativePushdown(): Boolean = true
 
     override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = {
@@ -95,6 +104,10 @@ class InMemoryEnhancedPartitionFilterTable(
             InMemoryTableWithV2Filter.supportsPredicates(Array(p)) =>
           if (acceptPartitionPredicates) {
             firstPassPushedPredicates += p
+            // Simulate partial pushdown: pushed, but still returned for post-scan.
+            if (returnAcceptedPartitionPredicates) {
+              returned += p
+            }
           } else {
             returned += p
           }
@@ -166,4 +179,12 @@ object InMemoryEnhancedPartitionFilterTable {
    * mocking a data source that can evaluate this particular data predicate).
    */
   private[catalog] val AcceptDataPredicatesKey = "accept-data-predicates"
+
+  /**
+   * Table property: when "true", first-pass partition predicates that are accepted (pushed) are
+   * also returned for post-scan evaluation, simulating a partial pushdown (e.g. a Parquet
+   * row group filter). Used to verify that already-pushed predicates are not re-derived as
+   * PartitionPredicates in the second pass.
+   */
+  private[catalog] val ReturnAcceptedPartitionPredicatesKey = "return-accepted-partition-predicates"
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
index 91e899bc1169e..3af3b0aece5d1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.connector.catalog
 
-import java.time.Instant
 import java.util
 
 import org.apache.spark.sql.catalyst.InternalRow
@@ -26,27 +25,45 @@ import org.apache.spark.sql.connector.catalog.constraints.Constraint
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
 import org.apache.spark.sql.connector.expressions.{FieldReference, LogicalExpressions, NamedReference, SortDirection, SortOrder, Transform}
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder}
-import org.apache.spark.sql.connector.write.{BatchWrite, DeltaBatchWrite, DeltaWrite, DeltaWriteBuilder, DeltaWriter, DeltaWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, RequiresDistributionAndOrdering, RowLevelOperation, RowLevelOperationBuilder, RowLevelOperationInfo, SupportsDelta, Write, WriteBuilder, WriterCommitMessage, WriteSummary}
+import org.apache.spark.sql.connector.write.{BatchWrite, DeltaBatchWrite, DeltaWrite, DeltaWriteBuilder, DeltaWriter, DeltaWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, RequiresDistributionAndOrdering, RowLevelOperation, RowLevelOperationBuilder, RowLevelOperationInfo, SupportsDelta, Write, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.connector.write.RowLevelOperation.Command
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
 
-class InMemoryRowLevelOperationTable(
+class InMemoryRowLevelOperationTable private (
     name: String,
-    schema: StructType,
+    columns: Array[Column],
     partitioning: Array[Transform],
     properties: util.Map[String, String],
-    constraints: Array[Constraint] = Array.empty)
+    constraints: Array[Constraint],
+    tableId: String)
   extends InMemoryTable(
     name,
-    CatalogV2Util.structTypeToV2Columns(schema),
+    columns,
     partitioning,
     properties,
-    constraints)
+    constraints,
+    id = tableId)
   with SupportsRowLevelOperations {
 
+  def this(
+      name: String,
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: util.Map[String, String],
+      constraints: Array[Constraint] = Array.empty,
+      tableId: String = java.util.UUID.randomUUID().toString) = {
+    this(
+      name = name,
+      columns = CatalogV2Util.structTypeToV2Columns(schema),
+      partitioning = partitioning,
+      properties = properties,
+      constraints = constraints,
+      tableId = tableId)
+  }
+
   private final val PARTITION_COLUMN_REF = FieldReference(PartitionKeyColumn.name)
   private final val INDEX_COLUMN_REF = FieldReference(IndexColumn.name)
   private final val SUPPORTS_DELTAS = "supports-deltas"
@@ -62,6 +79,44 @@ class InMemoryRowLevelOperationTable(
   // (operation, id, metadata, row)
   var lastWriteLog: Seq[InternalRow] = Seq.empty
 
+  // Set by InMemoryRowLevelOperationTableCatalog.loadTable when this instance is a snapshot copy
+  // pinned at load time. A real transactional catalog (Delta/Iceberg) persists TRUNCATE through a
+  // commit, but the default SupportsDelete.truncateTable() only mutates this snapshot's own data,
+  // which is then discarded. Forward the wipe to the live table so TRUNCATE TABLE behaves like a
+  // real catalog rather than silently no-op'ing.
+  private[catalog] var liveTableForTruncate: Option[InMemoryRowLevelOperationTable] = None
+
+  override def truncateTable(): Boolean = liveTableForTruncate match {
+    case Some(live) => live.truncateTable() // live has no back-ref -> default truncate on itself
+    case None => super.truncateTable()
+  }
+
+  override def copy(): Table = {
+    val copied = InMemoryRowLevelOperationTable.withColumns(
+      name = name,
+      columns = columns(),
+      partitioning = partitioning,
+      properties = properties,
+      constraints = constraints,
+      tableId = id)
+    dataMap.synchronized {
+      dataMap.foreach { case (key, splits) =>
+        val copiedSplits = splits.map { bufferedRows =>
+          val copiedBufferedRows = new BufferedRows(bufferedRows.key, bufferedRows.schema)
+          copiedBufferedRows.rows ++= bufferedRows.rows.map(_.copy())
+          copiedBufferedRows
+        }
+        copied.dataMap.put(key, copiedSplits)
+      }
+    }
+    copied.commits ++= commits.map(_.copy())
+    copied.setVersionAndValidatedVersionFrom(this)
+    copied.replacedPartitions = replacedPartitions
+    copied.lastWriteInfo = lastWriteInfo
+    copied.lastWriteLog = lastWriteLog
+    copied
+  }
+
   override def newRowLevelOperationBuilder(
       info: RowLevelOperationInfo): RowLevelOperationBuilder = {
     if (properties.getOrDefault(SUPPORTS_DELTAS, "false") == "true") {
@@ -125,18 +180,11 @@ class InMemoryRowLevelOperationTable(
     override def description(): String = "InMemoryPartitionReplaceOperation"
   }
 
-  abstract class RowLevelOperationBatchWrite extends TestBatchWrite {
-
-    override def commit(messages: Array[WriterCommitMessage], metrics: WriteSummary): Unit = {
-      commit(messages)
-      commits += Commit(Instant.now().toEpochMilli, Some(metrics))
-    }
-  }
-
   private case class PartitionBasedReplaceData(scan: InMemoryBatchScan)
-    extends RowLevelOperationBatchWrite {
+    extends TestBatchWrite {
 
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+    override protected def doCommit(
+        messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       val newData = messages.map(_.asInstanceOf[BufferedRows])
       val readRows = scan.data.flatMap(_.asInstanceOf[BufferedRows].rows)
       val readPartitions = readRows.map(r => getKey(r, schema)).distinct
@@ -198,12 +246,12 @@ class InMemoryRowLevelOperationTable(
     }
   }
 
-  private object TestDeltaBatchWrite extends RowLevelOperationBatchWrite with DeltaBatchWrite{
+  private object TestDeltaBatchWrite extends TestBatchWrite with DeltaBatchWrite {
     override def createBatchWriterFactory(info: PhysicalWriteInfo): DeltaWriterFactory = {
       new DeltaBufferedRowsWriterFactory(CatalogV2Util.v2ColumnsToStructType(columns()))
     }
 
-    override def commit(messages: Array[WriterCommitMessage]): Unit = {
+    override protected def doCommit(messages: Array[WriterCommitMessage]): Unit = {
       val newData = messages.map(_.asInstanceOf[BufferedRows])
       withDeletes(newData)
       withData(newData, columns())
@@ -264,3 +312,16 @@ private class DeltaBufferWriter(schema: StructType) extends BufferWriter(schema)
 
   override def commit(): WriterCommitMessage = buffer
 }
+
+object InMemoryRowLevelOperationTable {
+  def withColumns(
+      name: String,
+      columns: Array[Column],
+      partitioning: Array[Transform],
+      properties: util.Map[String, String],
+      constraints: Array[Constraint] = Array.empty,
+      tableId: String = java.util.UUID.randomUUID().toString): InMemoryRowLevelOperationTable = {
+    new InMemoryRowLevelOperationTable(
+      name, columns, partitioning, properties, constraints, tableId)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala
index bbb9041bab37c..cdc59ff637c0b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala
@@ -17,11 +17,52 @@
 
 package org.apache.spark.sql.connector.catalog
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.connector.catalog.transactions.{Transaction, TransactionInfo}
+import org.apache.spark.sql.types.StructType
 
-class InMemoryRowLevelOperationTableCatalog extends InMemoryTableCatalog {
+class InMemoryRowLevelOperationTableCatalog
+    extends InMemoryTableCatalog
+    with TransactionalCatalogPlugin {
   import CatalogV2Implicits._
 
+  // The current active transaction.
+  var transaction: Txn = _
+  // The last completed transaction.
+  var lastTransaction: Txn = _
+  // All transactions in order (committed and aborted), allowing per-statement
+  // validation in SQL scripting tests.
+  val observedTransactions: ArrayBuffer[Txn] = new ArrayBuffer[Txn]()
+  // Test-only knob. When true, the next transaction created by `beginTransaction` will reject
+  // register-scans calls (`registerScans` returns false unconditionally). Reset after consumed.
+  var nextTxnRejectRegisteredScansAttempt: Boolean = false
+
+  // Each `loadTable` returns a fresh snapshot pinned at the current table version (id is
+  // preserved). This is the "pin at table loading" semantics that lets version-aware
+  // `Table.equals` catch staleness: a cached relation holds a copy frozen at V1; a later load
+  // returns a copy at V2, and the two compare unequal so cache substitution fails before
+  // `Transaction.registerScans` is consulted.
+  override def loadTable(ident: Identifier): Table = {
+    liveTable(ident) match {
+      case rlot: InMemoryRowLevelOperationTable =>
+        val snapshot = rlot.copy().asInstanceOf[InMemoryRowLevelOperationTable]
+        snapshot.liveTableForTruncate = Some(rlot)
+        snapshot
+      case other => other
+    }
+  }
+
+  override def beginTransaction(info: TransactionInfo): Transaction = {
+    assert(transaction == null || transaction.currentState != Active)
+    val txn = new Txn(new TxnTableCatalog(this))
+    txn.rejectRegisteredScansAttempt = nextTxnRejectRegisteredScansAttempt
+    nextTxnRejectRegisteredScansAttempt = false
+    this.transaction = txn
+    txn
+  }
+
   override def createTable(ident: Identifier, tableInfo: TableInfo): Table = {
     if (tables.containsKey(ident)) {
       throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
@@ -41,11 +82,7 @@ class InMemoryRowLevelOperationTableCatalog extends InMemoryTableCatalog {
   override def alterTable(ident: Identifier, changes: TableChange*): Table = {
     val table = loadTable(ident).asInstanceOf[InMemoryRowLevelOperationTable]
     val properties = CatalogV2Util.applyPropertiesChanges(table.properties, changes)
-    val schema = CatalogV2Util.applySchemaChanges(
-      table.schema,
-      changes,
-      tableProvider = Some("in-memory"),
-      statementType = "ALTER TABLE")
+    val schema = computeAlterTableSchema(table.schema, changes.toSeq)
     val partitioning = CatalogV2Util.applyClusterByChanges(table.partitioning, schema, changes)
     val constraints = CatalogV2Util.collectConstraintChanges(table, changes)
 
@@ -54,18 +91,34 @@ class InMemoryRowLevelOperationTableCatalog extends InMemoryTableCatalog {
       throw new IllegalArgumentException(s"Cannot drop all fields")
     }
 
-    val newTable = new InMemoryRowLevelOperationTable(
+    val columnsWithIds = InMemoryBaseTable.assignMissingIds(
+      oldColumns = table.columns(),
+      newColumns = CatalogV2Util.structTypeToV2Columns(schema))
+
+    val newTable = InMemoryRowLevelOperationTable.withColumns(
       name = table.name,
-      schema = schema,
+      columns = columnsWithIds,
       partitioning = partitioning,
       properties = properties,
-      constraints = constraints)
+      constraints = constraints,
+      tableId = table.id)
     newTable.alterTableWithData(table.data, schema)
+    newTable.setVersionAndValidatedVersionFrom(table)
 
     tables.put(ident, newTable)
 
     newTable
   }
+
+  /**
+   * Computes the schema that would result from applying `changes` to `currentSchema`.
+   * Can be overridden by subclasses to simulate catalogs that selectively ignore changes
+   * (e.g. [[PartialSchemaEvolutionCatalog]]).
+   */
+  def computeAlterTableSchema(currentSchema: StructType, changes: Seq[TableChange]): StructType = {
+    CatalogV2Util.applySchemaChanges(
+      currentSchema, changes, tableProvider = Some("in-memory"), statementType = "ALTER TABLE")
+  }
 }
 
 /**
@@ -84,14 +137,21 @@ class PartialSchemaEvolutionCatalog extends InMemoryRowLevelOperationTableCatalo
       case _ => false
     }
     val properties = CatalogV2Util.applyPropertiesChanges(table.properties, propertyChanges)
+    val schema = computeAlterTableSchema(table.schema, changes.toSeq)
     val newTable = new InMemoryRowLevelOperationTable(
       name = table.name,
-      schema = table.schema,
+      schema = schema,
       partitioning = table.partitioning,
       properties = properties,
       constraints = table.constraints)
     newTable.alterTableWithData(table.data, table.schema)
+    newTable.setVersionAndValidatedVersionFrom(table)
     tables.put(ident, newTable)
     newTable
   }
+
+  // Ignores all schema changes and returns the current schema unchanged.
+  override def computeAlterTableSchema(
+      currentSchema: StructType,
+      changes: Seq[TableChange]): StructType = currentSchema
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
index d5738475031dc..c783bfbece149 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.connector.write.{LogicalWriteInfo, SupportsOverwrite
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{LongType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -157,10 +158,7 @@ class InMemoryTable(
 
     copiedTable.commits ++= commits.map(_.copy())
 
-    copiedTable.setVersion(version())
-    if (validatedVersion() != null) {
-      copiedTable.setValidatedVersion(validatedVersion())
-    }
+    copiedTable.setVersionAndValidatedVersionFrom(this)
 
     copiedTable
   }
@@ -204,7 +202,8 @@ class InMemoryTable(
 
   private class Overwrite(filters: Array[Filter]) extends TestBatchWrite {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+    override protected def doCommit(
+        messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       val deleteKeys = InMemoryTable.filtersToKeys(
         dataMap.keys, partCols.map(_.toSeq.quoted).toImmutableArraySeq, filters)
       dataMap --= deleteKeys
@@ -215,6 +214,15 @@ class InMemoryTable(
 
 object InMemoryTable {
 
+  // Convert UTF8String to string to make sure equality checks between filters and partitions
+  // work correctly.
+  private def valuesEqual(filterValue: Any, partitionValue: Any): Boolean =
+    (filterValue, partitionValue) match {
+      case (s: String, u: UTF8String) => u.toString == s
+      case (u: UTF8String, s: String) => u.toString == s
+      case _ => filterValue == partitionValue
+    }
+
   def filtersToKeys(
       keys: Iterable[Seq[Any]],
       partitionNames: Seq[String],
@@ -222,7 +230,7 @@ object InMemoryTable {
     keys.filter { partValues =>
       filters.flatMap(splitAnd).forall {
         case EqualTo(attr, value) =>
-          value == InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
+          valuesEqual(value, InMemoryBaseTable.extractValue(attr, partitionNames, partValues))
         case EqualNullSafe(attr, value) =>
           val attrVal = InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
           if (attrVal == null && value == null) {
@@ -230,7 +238,7 @@ object InMemoryTable {
           } else if (attrVal == null || value == null) {
             false
           } else {
-            value == attrVal
+            valuesEqual(value, attrVal)
           }
         case IsNull(attr) =>
           null == InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
index ff7995ad6697e..c9a6c4acfa014 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
@@ -40,7 +40,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
   protected val namespaces: util.Map[List[String], Map[String, String]] =
     new ConcurrentHashMap[List[String], Map[String, String]]()
 
-  protected val tables: util.Map[Identifier, Table] =
+  protected var tables: util.Map[Identifier, Table] =
     new ConcurrentHashMap[Identifier, Table]()
 
   private val invalidatedTables: util.Set[Identifier] = ConcurrentHashMap.newKeySet()
@@ -71,6 +71,14 @@ class BasicInMemoryTableCatalog extends TableCatalog {
     }
   }
 
+  // Returns the underlying live instance without copying. Used by tests that need to mutate
+  // state in a way that's observable to subsequent `loadTable` callers, and by wrappers that
+  // need to propagate writes to the live state.
+  def liveTable(ident: Identifier): Table = {
+    Option(tables.get(ident)).getOrElse(
+      throw new NoSuchTableException(ident.asMultipartIdentifier))
+  }
+
   // load table for writes
   override def loadTable(
       ident: Identifier,
@@ -179,25 +187,45 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       throw new IllegalArgumentException(s"Cannot drop all fields")
     }
 
+    // Compute the intermediate schema that only reflects column deletions.
+    // [[InMemoryBaseTable.alterTableWithData]] decides which old-row fields to keep by
+    // matching names against its newSchema argument. Passing this post-drop schema
+    // (rather than the final schema that may re-add a same-named column) ensures that
+    // dropped column values are physically removed from existing data.
+    // Note: this only handles top-level column deletions. Nested column deletions
+    // would need additional handling, but [[alterTableWithData]] only filters by
+    // top-level field name anyway.
+    val deletedTopLevelNames = changes.collect {
+      case d: TableChange.DeleteColumn if d.fieldNames.length == 1 => d.fieldNames.head
+    }.toSet
+    val schemaAfterDrops = if (deletedTopLevelNames.nonEmpty) {
+      StructType(table.schema.fields.filterNot(f => deletedTopLevelNames(f.name)))
+    } else {
+      schema
+    }
+
     table.increaseVersion()
     val currentVersion = table.version()
+    val columnsWithIds = InMemoryBaseTable.assignMissingIds(
+      oldColumns = table.columns(),
+      newColumns = CatalogV2Util.structTypeToV2Columns(schema))
     val newTable = table match {
       case _: InMemoryTable =>
         new InMemoryTable(
           name = table.name,
-          columns = CatalogV2Util.structTypeToV2Columns(schema),
+          columns = columnsWithIds,
           partitioning = finalPartitioning,
           properties = properties,
           constraints = constraints,
           id = table.id)
-          .alterTableWithData(table.data, schema)
+          .alterTableWithData(table.data, schemaAfterDrops)
       case _: InMemoryTableWithV2Filter =>
         new InMemoryTableWithV2Filter(
           name = table.name,
-          columns = CatalogV2Util.structTypeToV2Columns(schema),
+          columns = columnsWithIds,
           partitioning = finalPartitioning,
           properties = properties)
-          .alterTableWithData(table.data, schema)
+          .alterTableWithData(table.data, schemaAfterDrops)
       case other =>
         throw new UnsupportedOperationException(
           s"Unsupported InMemoryBaseTable subclass: ${other.getClass.getName}")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableViewCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableViewCatalog.scala
new file mode 100644
index 0000000000000..a3506938dea7c
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableViewCatalog.scala
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, NoSuchViewException, TableAlreadyExistsException, ViewAlreadyExistsException}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * An in-memory [[TableViewCatalog]] for tests. Tables and views share a single keyspace per
+ * the [[TableViewCatalog]] contract; the stored value's runtime type ([[TableInfo]] vs
+ * [[ViewInfo]]) is the kind discriminator. Also implements [[SupportsNamespaces]] with a
+ * minimal namespace store, so analyzer rules that read namespace metadata (e.g.
+ * `ApplyDefaultCollation` consulting `loadNamespaceMetadata` for `PROP_COLLATION`) work
+ * uniformly with the v1 session catalog. Suitable for any test suite that wants to exercise
+ * v2 view DDL or inspection commands against a non-session catalog.
+ */
+class InMemoryTableViewCatalog extends TableViewCatalog with SupportsNamespaces {
+
+  private val store =
+    new ConcurrentHashMap[(Seq[String], String), TableInfo]()
+  private val namespaces =
+    new ConcurrentHashMap[Seq[String], util.Map[String, String]]()
+
+  override def loadTableOrView(ident: Identifier): Table = {
+    val key = (ident.namespace().toSeq, ident.name())
+    Option(store.get(key))
+      .map(new MetadataTable(_, ident.toString))
+      .getOrElse(throw new NoSuchTableException(ident))
+  }
+
+  // ----- TableCatalog -----------------------------------------------------------------
+
+  override def createTable(ident: Identifier, info: TableInfo): Table = {
+    val key = (ident.namespace().toSeq, ident.name())
+    if (store.putIfAbsent(key, info) != null) {
+      throw new TableAlreadyExistsException(ident)
+    }
+    new MetadataTable(info, ident.toString)
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    throw new UnsupportedOperationException("alterTable not supported on InMemoryTableViewCatalog")
+  }
+
+  override def dropTable(ident: Identifier): Boolean = {
+    val key = (ident.namespace().toSeq, ident.name())
+    val existing = store.get(key)
+    if (existing == null || existing.isInstanceOf[ViewInfo]) return false
+    store.remove(key) != null
+  }
+
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    val oldKey = (oldIdent.namespace().toSeq, oldIdent.name())
+    val newKey = (newIdent.namespace().toSeq, newIdent.name())
+    val existing = store.get(oldKey)
+    if (existing == null || existing.isInstanceOf[ViewInfo]) {
+      throw new NoSuchTableException(oldIdent)
+    }
+    if (store.putIfAbsent(newKey, existing) != null) {
+      throw new TableAlreadyExistsException(newIdent)
+    }
+    store.remove(oldKey)
+  }
+
+  override def listTables(namespace: Array[String]): Array[Identifier] = {
+    val target = namespace.toSeq
+    val ids = new java.util.ArrayList[Identifier]()
+    store.forEach { (key, info) =>
+      if (key._1 == target && !info.isInstanceOf[ViewInfo]) {
+        ids.add(Identifier.of(key._1.toArray, key._2))
+      }
+    }
+    ids.toArray(new Array[Identifier](0))
+  }
+
+  // ----- ViewCatalog ------------------------------------------------------------------
+
+  override def listViews(namespace: Array[String]): Array[Identifier] = {
+    val target = namespace.toSeq
+    val ids = new java.util.ArrayList[Identifier]()
+    store.forEach { (key, info) =>
+      if (key._1 == target && info.isInstanceOf[ViewInfo]) {
+        ids.add(Identifier.of(key._1.toArray, key._2))
+      }
+    }
+    ids.toArray(new Array[Identifier](0))
+  }
+
+  override def createView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    val key = (ident.namespace().toSeq, ident.name())
+    if (store.putIfAbsent(key, info) != null) {
+      throw new ViewAlreadyExistsException(ident)
+    }
+    info
+  }
+
+  override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    val key = (ident.namespace().toSeq, ident.name())
+    val existing = store.get(key)
+    if (existing == null || !existing.isInstanceOf[ViewInfo]) {
+      throw new NoSuchViewException(ident)
+    }
+    store.put(key, info)
+    info
+  }
+
+  override def dropView(ident: Identifier): Boolean = {
+    val key = (ident.namespace().toSeq, ident.name())
+    val existing = store.get(key)
+    if (existing == null || !existing.isInstanceOf[ViewInfo]) return false
+    store.remove(key) != null
+  }
+
+  override def renameView(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    val oldKey = (oldIdent.namespace().toSeq, oldIdent.name())
+    val newKey = (newIdent.namespace().toSeq, newIdent.name())
+    val existing = store.get(oldKey)
+    if (existing == null || !existing.isInstanceOf[ViewInfo]) {
+      throw new NoSuchViewException(oldIdent)
+    }
+    if (store.putIfAbsent(newKey, existing) != null) {
+      throw new ViewAlreadyExistsException(newIdent)
+    }
+    store.remove(oldKey)
+  }
+
+  // ----- SupportsNamespaces -----------------------------------------------------------
+
+  // A namespace exists if it was explicitly created or if any stored entry sits under it.
+  private def implicitNamespaces: Set[Seq[String]] =
+    store.keySet.asScala.iterator.map(_._1).toSet
+
+  override def listNamespaces(): Array[Array[String]] = {
+    val all = (namespaces.keySet.asScala ++ implicitNamespaces).toSet
+    all.iterator.filter(_.nonEmpty).map(ns => Array(ns.head)).toArray.distinct
+  }
+
+  override def listNamespaces(parent: Array[String]): Array[Array[String]] = {
+    val parentSeq = parent.toSeq
+    val all = (namespaces.keySet.asScala ++ implicitNamespaces).toSet
+    all.iterator
+      .filter(_.size > parentSeq.size)
+      .filter(_.startsWith(parentSeq))
+      .map(_.take(parentSeq.size + 1).toArray)
+      .toArray
+      .distinct
+  }
+
+  override def namespaceExists(namespace: Array[String]): Boolean = {
+    val ns = namespace.toSeq
+    namespaces.containsKey(ns) || implicitNamespaces.exists(_.startsWith(ns))
+  }
+
+  override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = {
+    val ns = namespace.toSeq
+    Option(namespaces.get(ns)) match {
+      case Some(metadata) => metadata
+      case _ if namespaceExists(namespace) => util.Collections.emptyMap[String, String]
+      case _ => throw new NoSuchNamespaceException(name() +: namespace)
+    }
+  }
+
+  override def createNamespace(
+      namespace: Array[String],
+      metadata: util.Map[String, String]): Unit = {
+    val ns = namespace.toSeq
+    if (namespaces.putIfAbsent(ns, new util.HashMap[String, String](metadata)) != null) {
+      throw new NamespaceAlreadyExistsException(namespace)
+    }
+  }
+
+  override def alterNamespace(
+      namespace: Array[String],
+      changes: NamespaceChange*): Unit = {
+    val ns = namespace.toSeq
+    val current = Option(namespaces.get(ns)).getOrElse {
+      if (!namespaceExists(namespace)) {
+        throw new NoSuchNamespaceException(name() +: namespace)
+      }
+      new util.HashMap[String, String]()
+    }
+    val updated = CatalogV2Util.applyNamespaceChanges(current, changes.toSeq)
+    namespaces.put(ns, updated)
+  }
+
+  override def dropNamespace(namespace: Array[String], cascade: Boolean): Boolean = {
+    val ns = namespace.toSeq
+    if (!cascade && implicitNamespaces.exists(_.startsWith(ns))) {
+      throw new org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException(
+        name() +: namespace)
+    }
+    if (cascade) {
+      val keysToRemove = store.keySet.asScala.filter(_._1.startsWith(ns)).toSeq
+      keysToRemove.foreach(store.remove)
+    }
+    namespaces.remove(ns) != null || implicitNamespaces.exists(_.startsWith(ns))
+  }
+
+  // Test-only accessors --------------------------------------------------------------
+
+  /** Returns the stored entry (table or view) for the identifier, or throws if missing. */
+  def getStoredInfo(namespace: Array[String], name: String): TableInfo = {
+    Option(store.get((namespace.toSeq, name))).getOrElse {
+      throw new NoSuchTableException(Identifier.of(namespace, name))
+    }
+  }
+
+  /** Returns the stored ViewInfo, or throws if the entry is missing or is not a view. */
+  def getStoredView(namespace: Array[String], name: String): ViewInfo = {
+    getStoredInfo(namespace, name) match {
+      case v: ViewInfo => v
+      case _ => throw new IllegalStateException(
+        s"stored entry at ${namespace.mkString(".")}.$name is not a view")
+    }
+  }
+
+  // CatalogPlugin --------------------------------------------------------------------
+
+  private var catalogName: String = ""
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    catalogName = name
+  }
+  override def name(): String = catalogName
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithTableSample.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithTableSample.scala
new file mode 100644
index 0000000000000..514a7f3beda40
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithTableSample.scala
@@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+import java.util.Locale
+
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.expressions.filter.Predicate
+import org.apache.spark.sql.connector.join.JoinType
+import org.apache.spark.sql.connector.read.{InputPartition, SampleMethod, Scan, ScanBuilder, SupportsPushDownJoin, SupportsPushDownTableSample, SupportsPushDownV2Filters}
+import org.apache.spark.sql.connector.read.SupportsPushDownJoin.ColumnWithAlias
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * An in-memory table that supports TABLESAMPLE pushdown (both BERNOULLI and SYSTEM).
+ *
+ * For SYSTEM sampling, entire splits (InputPartitions) are included or skipped based on
+ * a hash of their index and the seed. For BERNOULLI sampling, the pushdown is accepted
+ * but rows are not actually filtered (Spark's row-level Sample operator handles it).
+ */
+class InMemoryTableWithTableSample(
+    name: String,
+    columns: Array[Column],
+    partitioning: Array[Transform],
+    properties: util.Map[String, String])
+  extends InMemoryBaseTable(name, columns, partitioning, properties) {
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
+    new InMemoryWriterBuilder(info) {
+      override def truncate(): WriteBuilder = {
+        writer = new TruncateAndAppend(this.info)
+        streamingWriter = new StreamingTruncateAndAppend(this.info)
+        this
+      }
+    }
+  }
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new InMemoryTableSampleScanBuilder(schema, options)
+  }
+
+  class InMemoryTableSampleScanBuilder(
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap)
+    extends InMemoryScanBuilder(tableSchema, options) with SupportsPushDownTableSample {
+
+    private var sampleFraction: Double = 1.0
+    private var sampleSeed: Long = 0L
+    private var sampleMethod: SampleMethod = SampleMethod.BERNOULLI
+    private var sampleWithReplacement: Boolean = false
+    private var samplePushed: Boolean = false
+
+    override def pushTableSample(
+        lowerBound: Double,
+        upperBound: Double,
+        withReplacement: Boolean,
+        seed: Long): Boolean = {
+      this.sampleFraction = upperBound - lowerBound
+      this.sampleSeed = seed
+      this.sampleMethod = SampleMethod.BERNOULLI
+      this.sampleWithReplacement = withReplacement
+      this.samplePushed = true
+      true
+    }
+
+    override def pushTableSample(
+        lowerBound: Double,
+        upperBound: Double,
+        withReplacement: Boolean,
+        seed: Long,
+        sampleMethod: SampleMethod): Boolean = {
+      this.sampleFraction = upperBound - lowerBound
+      this.sampleSeed = seed
+      this.sampleMethod = sampleMethod
+      this.sampleWithReplacement = withReplacement
+      this.samplePushed = true
+      true
+    }
+
+    override def build: Scan = {
+      val allPartitions = data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq
+      val filteredPartitions = if (samplePushed && sampleMethod == SampleMethod.SYSTEM) {
+        // SYSTEM sampling: include/skip entire splits based on hash of index + seed
+        allPartitions.zipWithIndex.filter { case (_, idx) =>
+          val hash = ((idx.toLong * 31 + sampleSeed) & Long.MaxValue).toDouble / Long.MaxValue
+          hash < sampleFraction
+        }.map(_._1)
+      } else {
+        allPartitions
+      }
+      if (samplePushed) {
+        new InMemoryBatchScanWithSample(
+          filteredPartitions, schema, tableSchema, options,
+          sampleFraction, sampleSeed, sampleMethod, sampleWithReplacement)
+      } else {
+        InMemoryBatchScan(filteredPartitions, schema, tableSchema, options)
+      }
+    }
+  }
+
+  private class InMemoryBatchScanWithSample(
+      data: Seq[InputPartition],
+      readSchema: StructType,
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap,
+      sampleFraction: Double,
+      sampleSeed: Long,
+      sampleMethod: SampleMethod,
+      sampleWithReplacement: Boolean)
+    extends InMemoryBatchScan(data, readSchema, tableSchema, options) {
+
+    override def description(): String = {
+      val pct = sampleFraction * 100
+      val method = sampleMethod.toString.toUpperCase(Locale.ROOT)
+      s"${super.description()} $method SAMPLE ($pct) $sampleWithReplacement SEED($sampleSeed)"
+    }
+  }
+}
+
+/**
+ * An in-memory table that supports both TABLESAMPLE pushdown and JOIN pushdown.
+ * Used to test the guard that prevents join pushdown when a side has a pushed sample.
+ */
+class InMemoryTableWithJoinAndSample(
+    name: String,
+    columns: Array[Column],
+    partitioning: Array[Transform],
+    properties: util.Map[String, String])
+  extends InMemoryTableWithTableSample(name, columns, partitioning, properties) {
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new InMemoryJoinAndSampleScanBuilder(schema, options)
+  }
+
+  class InMemoryJoinAndSampleScanBuilder(
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap)
+    extends InMemoryTableSampleScanBuilder(tableSchema, options)
+      with SupportsPushDownJoin with SupportsPushDownV2Filters {
+
+    private[catalog] val ownSchema: StructType = tableSchema
+    private var pushed: Array[Predicate] = Array.empty
+    private var joinedSchema: Option[StructType] = None
+
+    override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = {
+      pushed = predicates
+      // Return empty - all predicates accepted (not actually filtered, just cleared
+      // so that the join pushdown pattern's Nil filter requirement is satisfied).
+      Array.empty
+    }
+
+    // Override V1 pushFilters (inherited from InMemoryScanBuilder) to also accept all
+    // filters. PushDownUtils.pushFilters matches SupportsPushDownFilters before
+    // SupportsPushDownV2Filters, so without this override isnotnull predicates remain
+    // as post-scan Filter nodes and block the join pushdown pattern match.
+    override def pushFilters(filters: Array[Filter]): Array[Filter] = {
+      Array.empty
+    }
+
+    override def pushedPredicates(): Array[Predicate] = pushed
+
+    override def isOtherSideCompatibleForJoin(other: SupportsPushDownJoin): Boolean = true
+
+    override def pushDownJoin(
+        other: SupportsPushDownJoin,
+        joinType: JoinType,
+        leftSideRequiredColumnsWithAliases: Array[ColumnWithAlias],
+        rightSideRequiredColumnsWithAliases: Array[ColumnWithAlias],
+        condition: Predicate): Boolean = {
+      val otherSchema = other.asInstanceOf[InMemoryJoinAndSampleScanBuilder].ownSchema
+      val leftFields = leftSideRequiredColumnsWithAliases.map { col =>
+        val name = if (col.alias() != null) col.alias() else col.colName()
+        tableSchema(col.colName()).copy(name = name)
+      }
+      val rightFields = rightSideRequiredColumnsWithAliases.map { col =>
+        val name = if (col.alias() != null) col.alias() else col.colName()
+        otherSchema(col.colName()).copy(name = name)
+      }
+      joinedSchema = Some(StructType(leftFields ++ rightFields))
+      true
+    }
+
+    override def build: Scan = {
+      joinedSchema match {
+        case Some(js) =>
+          InMemoryBatchScan(
+            data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq,
+            js, tableSchema, options)
+        case None => super.build
+      }
+    }
+  }
+}
+
+/**
+ * An in-memory table that supports TABLESAMPLE pushdown using only the legacy 4-arg
+ * pushTableSample method (does NOT override the 5-arg default). Used to test backward
+ * compatibility: BERNOULLI should push down via the default delegation, and SYSTEM
+ * should fail because the default returns false for SYSTEM.
+ */
+class InMemoryTableWithLegacyTableSample(
+    name: String,
+    columns: Array[Column],
+    partitioning: Array[Transform],
+    properties: util.Map[String, String])
+  extends InMemoryBaseTable(name, columns, partitioning, properties) {
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
+    new InMemoryWriterBuilder(info) {
+      override def truncate(): WriteBuilder = {
+        writer = new TruncateAndAppend(this.info)
+        streamingWriter = new StreamingTruncateAndAppend(this.info)
+        this
+      }
+    }
+  }
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new InMemoryLegacySampleScanBuilder(schema, options)
+  }
+
+  class InMemoryLegacySampleScanBuilder(
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap)
+    extends InMemoryScanBuilder(tableSchema, options) with SupportsPushDownTableSample {
+
+    // Only the 4-arg method is overridden; the 5-arg default method is inherited.
+    override def pushTableSample(
+        lowerBound: Double,
+        upperBound: Double,
+        withReplacement: Boolean,
+        seed: Long): Boolean = true
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithTableSampleCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithTableSampleCatalog.scala
new file mode 100644
index 0000000000000..12da978ea11a0
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithTableSampleCatalog.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.connector.expressions.Transform
+
+class InMemoryTableWithTableSampleCatalog extends InMemoryTableCatalog {
+  import CatalogV2Implicits._
+
+  override def createTable(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
+    }
+
+    InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
+
+    val tableName = s"$name.${ident.quoted}"
+    val table = new InMemoryTableWithTableSample(tableName, columns, partitions, properties)
+    tables.put(ident, table)
+    namespaces.putIfAbsent(ident.namespace.toList, Map())
+    table
+  }
+
+  override def createTable(ident: Identifier, tableInfo: TableInfo): Table = {
+    createTable(ident, tableInfo.columns(), tableInfo.partitions(), tableInfo.properties)
+  }
+}
+
+class InMemoryTableWithJoinAndSampleCatalog extends InMemoryTableCatalog {
+  import CatalogV2Implicits._
+
+  override def createTable(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
+    }
+
+    InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
+
+    val tableName = s"$name.${ident.quoted}"
+    val table = new InMemoryTableWithJoinAndSample(tableName, columns, partitions, properties)
+    tables.put(ident, table)
+    namespaces.putIfAbsent(ident.namespace.toList, Map())
+    table
+  }
+
+  override def createTable(ident: Identifier, tableInfo: TableInfo): Table = {
+    createTable(ident, tableInfo.columns(), tableInfo.partitions(), tableInfo.properties)
+  }
+}
+
+class InMemoryTableWithLegacyTableSampleCatalog extends InMemoryTableCatalog {
+  import CatalogV2Implicits._
+
+  override def createTable(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
+    }
+
+    InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
+
+    val tableName = s"$name.${ident.quoted}"
+    val table = new InMemoryTableWithLegacyTableSample(
+      tableName, columns, partitions, properties)
+    tables.put(ident, table)
+    namespaces.putIfAbsent(ident.namespace.toList, Map())
+    table
+  }
+
+  override def createTable(ident: Identifier, tableInfo: TableInfo): Table = {
+    createTable(ident, tableInfo.columns(), tableInfo.partitions(), tableInfo.properties)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
index f2827faf59435..e9d73d0f9fe1e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
@@ -140,7 +140,8 @@ class InMemoryTableWithV2Filter(
 
   private class Overwrite(predicates: Array[Predicate]) extends TestBatchWrite {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+    override protected def doCommit(
+        messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       val deleteKeys = InMemoryTableWithV2Filter.filtersToKeys(
         dataMap.keys, partCols.map(_.toSeq.quoted).toImmutableArraySeq, predicates)
       dataMap --= deleteKeys
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/MixedColumnIdTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/MixedColumnIdTableCatalog.scala
new file mode 100644
index 0000000000000..f77cad3c077dc
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/MixedColumnIdTableCatalog.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.connector.catalog.constraints.Constraint
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.internal.connector.ColumnImpl
+
+/**
+ * An [[InMemoryTableCatalog]] that selectively strips column IDs based on
+ * column names listed in [[MixedColumnIdTableCatalog.nullIdColumnNames]].
+ * This simulates connectors that support column IDs for some columns but
+ * not others, or that transition between supporting and not supporting
+ * column IDs over time.
+ *
+ * Tests manipulate [[MixedColumnIdTableCatalog.nullIdColumnNames]] between
+ * operations to control which columns have null IDs at any given point.
+ * The set is snapshotted each time a table is created, altered, or copied.
+ * Therefore, changes to the set after that point do not affect existing table instances.
+ */
+class MixedColumnIdTableCatalog extends InMemoryTableCatalog {
+
+  private def toMixedIdTable(table: InMemoryTable): MixedColumnIdInMemoryTable = {
+    val snapshot = MixedColumnIdTableCatalog.nullIdColumnNames.toSet
+    val mixedTable = new MixedColumnIdInMemoryTable(
+      name = table.name,
+      columns = table.columns(),
+      partitioning = table.partitioning,
+      properties = table.properties,
+      constraints = table.constraints,
+      id = table.id,
+      nullIdNames = snapshot)
+    mixedTable.alterTableWithData(table.data, table.schema)
+    mixedTable.setVersionAndValidatedVersionFrom(table)
+    mixedTable
+  }
+
+  override def createTable(
+      ident: Identifier,
+      info: TableInfo): Table = {
+    val table = super.createTable(ident, info).asInstanceOf[InMemoryTable]
+    val mixedTable = toMixedIdTable(table)
+    tables.put(ident, mixedTable)
+    mixedTable
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    val table = super.alterTable(ident, changes: _*).asInstanceOf[InMemoryTable]
+    val mixedTable = toMixedIdTable(table)
+    tables.put(ident, mixedTable)
+    mixedTable
+  }
+}
+
+object MixedColumnIdTableCatalog {
+  /** Column names (lowercase) whose IDs should be nullified. */
+  val nullIdColumnNames: mutable.Set[String] = mutable.Set.empty
+
+  def reset(): Unit = nullIdColumnNames.clear()
+}
+
+/**
+ * An [[InMemoryTable]] that selectively strips column IDs for columns
+ * whose names appear in the snapshotted [[nullIdNames]] set.
+ */
+class MixedColumnIdInMemoryTable(
+    name: String,
+    columns: Array[Column],
+    partitioning: Array[Transform],
+    properties: java.util.Map[String, String],
+    constraints: Array[Constraint] = Array.empty,
+    override val id: String = java.util.UUID.randomUUID().toString,
+    nullIdNames: Set[String] = Set.empty)
+  extends InMemoryTable(
+    name = name,
+    columns = columns,
+    partitioning = partitioning,
+    properties = properties,
+    constraints = constraints,
+    id = id) {
+
+  override def columns(): Array[Column] = {
+    super.columns().map { col =>
+      val impl = col.asInstanceOf[ColumnImpl]
+      if (nullIdNames.contains(impl.name.toLowerCase(Locale.ROOT))) {
+        impl.copy(id = null)
+      } else {
+        impl
+      }
+    }
+  }
+
+  override def copy(): Table = {
+    val copiedTable = new MixedColumnIdInMemoryTable(
+      name,
+      columns(),
+      partitioning,
+      properties,
+      constraints,
+      id,
+      nullIdNames)
+    dataMap.synchronized {
+      copiedTable.alterTableWithData(data, schema)
+    }
+    copiedTable.setVersionAndValidatedVersionFrom(this)
+    copiedTable
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullColumnIdInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullColumnIdInMemoryTableCatalog.scala
new file mode 100644
index 0000000000000..c26ce263c1f8b
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullColumnIdInMemoryTableCatalog.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import org.apache.spark.sql.connector.catalog.constraints.Constraint
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.internal.connector.ColumnImpl
+
+/**
+ * An [[InMemoryTableCatalog]] that strips column IDs from all columns
+ * ([[Column.id]] returns null). This simulates connectors that do not
+ * support column identity tracking.
+ *
+ * Tables are stored as [[NullColumnIdInMemoryTable]] instances that
+ * override [[columns]] to strip IDs. Data is copied from the table
+ * created by the parent [[InMemoryTableCatalog]].
+ *
+ * When column IDs are null, [[V2TableUtil.validateColumnIds]]
+ * skips validation entirely, meaning drop/re-add of a column is NOT
+ * detected via column IDs.
+ */
+class NullColumnIdInMemoryTableCatalog extends InMemoryTableCatalog {
+
+  private def toNullColumnIdTable(table: InMemoryTable): NullColumnIdInMemoryTable = {
+    val nullColIdTable = new NullColumnIdInMemoryTable(
+      name = table.name,
+      columns = table.columns(),
+      partitioning = table.partitioning,
+      properties = table.properties,
+      constraints = table.constraints,
+      id = table.id)
+    nullColIdTable.alterTableWithData(table.data, table.schema)
+    nullColIdTable.setVersionAndValidatedVersionFrom(table)
+    nullColIdTable
+  }
+
+  override def createTable(
+      ident: Identifier,
+      info: TableInfo): Table = {
+    val table = super.createTable(ident, info).asInstanceOf[InMemoryTable]
+    val nullColIdTable = toNullColumnIdTable(table)
+    tables.put(ident, nullColIdTable)
+    nullColIdTable
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    val table = super.alterTable(ident, changes: _*).asInstanceOf[InMemoryTable]
+    val nullColIdTable = toNullColumnIdTable(table)
+    tables.put(ident, nullColIdTable)
+    nullColIdTable
+  }
+}
+
+/**
+ * An [[InMemoryTable]] whose [[columns]] method always returns null
+ * column IDs, simulating a connector that does not support column
+ * identity tracking.
+ */
+class NullColumnIdInMemoryTable(
+    name: String,
+    columns: Array[Column],
+    partitioning: Array[Transform],
+    properties: java.util.Map[String, String],
+    constraints: Array[Constraint] = Array.empty,
+    override val id: String = java.util.UUID.randomUUID().toString)
+  extends InMemoryTable(
+    name = name,
+    columns = columns,
+    partitioning = partitioning,
+    properties = properties,
+    constraints = constraints,
+    id = id) {
+
+  override def columns(): Array[Column] = {
+    super.columns().map(_.asInstanceOf[ColumnImpl].copy(id = null))
+  }
+
+  override def copy(): Table = {
+    val copiedTable = new NullColumnIdInMemoryTable(
+      name,
+      columns(),
+      partitioning,
+      properties,
+      constraints,
+      id)
+    dataMap.synchronized {
+      copiedTable.alterTableWithData(data, schema)
+    }
+    copiedTable.setVersionAndValidatedVersionFrom(this)
+    copiedTable
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullTableIdAndNullColumnIdInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullTableIdAndNullColumnIdInMemoryTableCatalog.scala
new file mode 100644
index 0000000000000..df7964f63b855
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullTableIdAndNullColumnIdInMemoryTableCatalog.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import org.apache.spark.sql.connector.catalog.constraints.Constraint
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.internal.connector.ColumnImpl
+
+/**
+ * An [[InMemoryTableCatalog]] that strips both table IDs ([[Table.id]]
+ * returns null) and column IDs ([[Column.id]] returns null). This simulates
+ * connectors that support neither table nor column identity tracking.
+ *
+ * When both IDs are null, neither the table identity check in [[V2TableRefreshUtil]]
+ * nor [[V2TableUtil.validateColumnIds]] fires, so drop/recreate of a table or
+ * drop/re-add of a column goes undetected.
+ */
+class NullTableIdAndNullColumnIdInMemoryTableCatalog extends InMemoryTableCatalog {
+
+  private def toNullIdsTable(
+      table: InMemoryTable): NullTableIdAndNullColumnIdInMemoryTable = {
+    val nullTable = new NullTableIdAndNullColumnIdInMemoryTable(
+      name = table.name,
+      columns = table.columns(),
+      partitioning = table.partitioning,
+      properties = table.properties,
+      constraints = table.constraints)
+    nullTable.alterTableWithData(table.data, table.schema)
+    nullTable.setVersionAndValidatedVersionFrom(table)
+    nullTable
+  }
+
+  override def createTable(
+      ident: Identifier,
+      info: TableInfo): Table = {
+    val table = super.createTable(ident, info).asInstanceOf[InMemoryTable]
+    val nullTable = toNullIdsTable(table)
+    tables.put(ident, nullTable)
+    nullTable
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    val table = super.alterTable(ident, changes: _*).asInstanceOf[InMemoryTable]
+    val nullTable = toNullIdsTable(table)
+    tables.put(ident, nullTable)
+    nullTable
+  }
+}
+
+/**
+ * An [[InMemoryTable]] with both null table ID and null column IDs,
+ * simulating a connector that supports neither identity tracking mechanism.
+ */
+class NullTableIdAndNullColumnIdInMemoryTable(
+    name: String,
+    columns: Array[Column],
+    partitioning: Array[Transform],
+    properties: java.util.Map[String, String],
+    constraints: Array[Constraint] = Array.empty)
+  extends InMemoryTable(
+    name = name,
+    columns = columns,
+    partitioning = partitioning,
+    properties = properties,
+    constraints = constraints,
+    id = null) {
+
+  override def columns(): Array[Column] = {
+    super.columns().map(_.asInstanceOf[ColumnImpl].copy(id = null))
+  }
+
+  override def copy(): Table = {
+    val copiedTable = new NullTableIdAndNullColumnIdInMemoryTable(
+      name,
+      columns(),
+      partitioning,
+      properties,
+      constraints)
+    dataMap.synchronized {
+      copiedTable.alterTableWithData(data, schema)
+    }
+    copiedTable.setVersionAndValidatedVersionFrom(this)
+    copiedTable
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullTableIdInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullTableIdInMemoryTableCatalog.scala
new file mode 100644
index 0000000000000..391eb619535f5
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/NullTableIdInMemoryTableCatalog.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+/**
+ * An [[InMemoryTableCatalog]] that creates tables WITHOUT table IDs
+ * ([[Table.id]] returns null). This simulates connectors that do not
+ * support table identity tracking.
+ *
+ * When table ID is null, the [[validateTableIdentity]] check in
+ * [[V2TableRefreshUtil]] is skipped entirely, meaning drop/recreate
+ * of a table is NOT detected via table ID.
+ *
+ * This is to test the scenario where connectors do not implement
+ * table IDs but do implement column IDs. In this scenario, column
+ * IDs assigned by [[InMemoryBaseTable]] still differ after recreate,
+ * so [[V2TableUtil.validateColumnIds]] catches the schema change.
+ */
+class NullTableIdInMemoryTableCatalog extends InMemoryTableCatalog {
+
+  override def createTable(
+      ident: Identifier,
+      info: TableInfo): Table = {
+    val table = super.createTable(ident, info).asInstanceOf[InMemoryTable]
+    val nullIdTable = new InMemoryTable(
+      table.name,
+      table.columns(),
+      table.partitioning,
+      table.properties,
+      table.constraints,
+      id = null)
+    tables.put(ident, nullIdTable)
+    nullIdTable
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SharedInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SharedInMemoryTableCatalog.scala
new file mode 100644
index 0000000000000..502ee9868ad39
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SharedInMemoryTableCatalog.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+import java.util.concurrent.ConcurrentHashMap
+
+/**
+ * A [[NullTableIdInMemoryTableCatalog]] that shares table state across
+ * all instances. This allows multiple [[SparkSession]]s to read and
+ * write the same tables, simulating a real shared metastore.
+ *
+ * Table IDs are null (inherited from [[NullTableIdInMemoryTableCatalog]]),
+ * so cross-session drop+recreate is detected via column IDs rather
+ * than table IDs.
+ */
+class SharedInMemoryTableCatalog extends NullTableIdInMemoryTableCatalog {
+  tables = SharedInMemoryTableCatalog.sharedTables
+  override protected val namespaces: util.Map[List[String], Map[String, String]] =
+    SharedInMemoryTableCatalog.sharedNamespaces
+}
+
+object SharedInMemoryTableCatalog {
+  val sharedTables = new ConcurrentHashMap[Identifier, Table]()
+  val sharedNamespaces = new ConcurrentHashMap[List[String], Map[String, String]]()
+
+  def reset(): Unit = {
+    sharedTables.clear()
+    sharedNamespaces.clear()
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TypeChangeResetsColIdTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TypeChangeResetsColIdTableCatalog.scala
new file mode 100644
index 0000000000000..d68f2e62b1365
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TypeChangeResetsColIdTableCatalog.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util.Locale
+
+import org.apache.spark.sql.internal.connector.ColumnImpl
+
+/**
+ * An [[InMemoryTableCatalog]] that assigns fresh column IDs when the
+ * column's data type changes. This is the inverse of the default
+ * [[InMemoryBaseTable.assignMissingIds]] behavior, which preserves IDs
+ * across type changes.
+ *
+ * Use this catalog for tests that need a type change to produce a new
+ * column ID (e.g., verifying that adding a nested field to a container
+ * type triggers a column ID mismatch).
+ */
+class TypeChangeResetsColIdTableCatalog extends InMemoryTableCatalog {
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    val oldColumns = loadTable(ident).columns()
+    val alteredTable = super.alterTable(ident, changes: _*).asInstanceOf[InMemoryTable]
+
+    // The parent alterTable preserves column IDs across type changes.
+    // Assign fresh IDs for columns whose type changed, so the column ID
+    // check detects the type change instead of the data columns validation.
+    val oldColsByName = oldColumns
+      .filter(_.id() != null)
+      .map(oldCol => oldCol.name().toLowerCase(Locale.ROOT) -> oldCol)
+      .toMap
+    val newColsWithResetIds: Array[Column] = alteredTable.columns().map { newCol =>
+      val key = newCol.name().toLowerCase(Locale.ROOT)
+      oldColsByName.get(key) match {
+        case Some(oldCol) if oldCol.dataType() != newCol.dataType() =>
+          newCol.asInstanceOf[ColumnImpl].copy(
+            id = InMemoryBaseTable.nextColumnId().toString): Column
+        case _ =>
+          newCol
+      }
+    }
+
+    val tableWithResetIds = new InMemoryTable(
+      alteredTable.name,
+      newColsWithResetIds,
+      alteredTable.partitioning,
+      alteredTable.properties,
+      alteredTable.constraints,
+      id = alteredTable.id)
+    tableWithResetIds.alterTableWithData(alteredTable.data, alteredTable.schema)
+    tableWithResetIds.setVersionAndValidatedVersionFrom(alteredTable)
+    tables.put(ident, tableWithResetIds)
+    tableWithResetIds
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala
index 77167d9a56570..c02c517ff546b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/V2TableUtilSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, MetadataAttribute}
 import org.apache.spark.sql.connector.catalog.TableCapability.BATCH_READ
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.connector.ColumnImpl
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaValidationMode.{ALLOW_NEW_TOP_LEVEL_FIELDS, PROHIBIT_CHANGES}
@@ -629,6 +630,23 @@ class V2TableUtilSuite extends SparkFunSuite {
     assert(errors.head.contains("`metadata`.`value`.`timestamp` BIGINT has been added"))
   }
 
+  test("validateColumnIds - multiple errors") {
+    val originalCols = Seq(
+      colWithId("salary", IntegerType, nullable = true, id = "id-1"),
+      colWithId("bonus", IntegerType, nullable = true, id = "id-2"))
+    val currentCols = Array(
+      colWithId("salary", IntegerType, nullable = true, id = "id-100"),
+      colWithId("bonus", IntegerType, nullable = true, id = "id-200"))
+    val table = TestTableWithMetadataSupport("test", currentCols)
+
+    val errors = V2TableUtil.validateColumnIds(
+      table = table,
+      originalCapturedCols = originalCols)
+    assert(errors == Seq(
+      "`salary` column ID has changed from id-1 to id-100",
+      "`bonus` column ID has changed from id-2 to id-200"))
+  }
+
   // simple table without metadata column support
   private case class TestTable(
       override val name: String,
@@ -677,6 +695,23 @@ class V2TableUtilSuite extends SparkFunSuite {
     Column.create(name, dataType, nullable)
   }
 
+  private def colWithId(
+      name: String,
+      dataType: DataType,
+      nullable: Boolean,
+      id: String): Column = {
+    ColumnImpl(
+      name = name,
+      dataType = dataType,
+      nullable = nullable,
+      comment = null,
+      defaultValue = null,
+      generationExpression = null,
+      identityColumnSpec = null,
+      metadataInJSON = null,
+      id = id)
+  }
+
   private def metaCol(
       name: String,
       dataType: DataType,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/ValidatingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/ValidatingInMemoryTableCatalog.scala
new file mode 100644
index 0000000000000..820f51a2af452
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/ValidatingInMemoryTableCatalog.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+
+/**
+ * A test catalog whose `createNamespace` validates the request before checking existence, so a
+ * pre-existing namespace surfaces a non-`NamespaceAlreadyExistsException` error. Mirrors the
+ * authorize-then-execute ordering of catalogs like Unity Catalog and is used to exercise the
+ * `IF NOT EXISTS` recovery path in `CreateNamespaceExec`.
+ */
+class ValidatingInMemoryTableCatalog extends InMemoryTableCatalog {
+  override def createNamespace(
+      namespace: Array[String],
+      metadata: util.Map[String, String]): Unit = {
+    if (namespaceExists(namespace)) {
+      throw new RuntimeException(
+        s"simulated validation failure on pre-existing namespace ${namespace.mkString(".")}")
+    }
+    super.createNamespace(namespace, metadata)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/txns.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/txns.scala
new file mode 100644
index 0000000000000..203aed450a5f7
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/txns.scala
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.mutable.ArrayBuffer
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.connector.catalog.transactions.Transaction
+import org.apache.spark.sql.connector.read.Scan
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, RowLevelOperationBuilder, RowLevelOperationInfo, WriteBuilder}
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+sealed trait TransactionState
+case object Active extends TransactionState
+case object Committed extends TransactionState
+case object Aborted extends TransactionState
+
+class Txn(override val catalog: TxnTableCatalog) extends Transaction {
+
+  private[this] var state: TransactionState = Active
+  private[this] var closed: Boolean = false
+
+  // Records every batch of scans the connector accepted via registerScans. Tests assert on this
+  // to confirm cache substitution went through the txn path.
+  val registeredScans: ArrayBuffer[Seq[Scan]] = ArrayBuffer.empty
+
+  // Test-only switch. When true, `registerScans` unconditionally returns false, simulating
+  // a connector that rejects all cache reuse. Used to verify Spark's cache-bypass behavior.
+  var rejectRegisteredScansAttempt: Boolean = false
+
+  def currentState: TransactionState = state
+
+  def isClosed: Boolean = closed
+
+  // Accept the batch only if every scan can be routed to a TxnTable this catalog is tracking;
+  // otherwise refuse. Staleness is handled upstream: `loadTable` returns a snapshot pinned at
+  // load, and version-aware `Table.equals` rejects cache matches whose underlying table has
+  // moved on.
+  override def registerScans(scans: Array[Scan]): Boolean = {
+    if (rejectRegisteredScansAttempt) return false
+
+    val routed = scans.toSeq.map {
+      case s: InMemoryBaseTable#InMemoryBatchScan =>
+        catalog.txnTables.values.find(_.delegate == s.table).map(_ -> s)
+      case _ => None
+    }
+    if (routed.exists(_.isEmpty)) return false
+
+    registeredScans += scans.toSeq
+    routed.flatten.foreach { case (txnTable, s) =>
+      txnTable.scanEvents += s.pushedFilters
+    }
+    true
+  }
+
+  override def commit(): Unit = {
+    if (closed) throw new IllegalStateException("Can't commit, already closed")
+    if (state == Aborted) throw new IllegalStateException("Can't commit, already aborted")
+    catalog.commit()
+    this.state = Committed
+  }
+
+  // This is idempotent since nested QEs can cause multiple aborts.
+  override def abort(): Unit = {
+    if (state == Committed || state == Aborted) return
+    this.state = Aborted
+  }
+
+  // This is idempotent since nested QEs can cause multiple aborts.
+  override def close(): Unit = {
+    if (!closed) {
+      catalog.clearActiveTransaction()
+      this.closed = true
+    }
+  }
+}
+
+// A special table used in row-level operation transactions. It inherits data
+// from the base table upon construction and propagates staged transaction state
+// back after an explicit commit.
+// Note, the in-memory data store does not handle concurrency at the moment. The assumes that the
+// underlying delegate table cannot change from concurrent transactions. Data sources need to
+// implement isolation semantics and make sure they are enforced.
+class TxnTable(
+    val delegate: InMemoryRowLevelOperationTable,
+    schema: StructType,
+    catalog: TxnTableCatalog)
+  extends InMemoryRowLevelOperationTable(
+    delegate.name,
+    schema,
+    delegate.partitioning,
+    delegate.properties,
+    delegate.constraints) {
+
+  // Expose the same id as the delegate so that identity checks during transaction re-resolution
+  // don't false-positive on the TxnTable wrapper having a different UUID.
+  override val id: String = delegate.id
+
+  // The starting version should be the delegate version.
+  setVersion(delegate.version())
+
+  // Preserve column IDs from the delegate so that column ID validation can correctly detect
+  // drop-and-re-add scenarios (different IDs) and pass when columns are unchanged (same IDs).
+  // Uses assignMissingIds to keep the delegate's IDs for existing columns while assigning
+  // fresh IDs for any new columns added by schema evolution.
+  updateColumns(InMemoryBaseTable.assignMissingIds(
+    oldColumns = delegate.columns(),
+    newColumns = columns()))
+
+  alterTableWithData(delegate.data, schema)
+
+  // A tracker of filters used in each scan.
+  val scanEvents = new ArrayBuffer[Array[Filter]]()
+
+  // Record scan events. This is invoked when building a scan for the particular table.
+  override protected def recordScanEvent(filters: Array[Filter]): Unit = {
+    scanEvents += filters
+  }
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    catalog.writeTarget = this
+    super.newWriteBuilder(info)
+  }
+
+  override def newRowLevelOperationBuilder(
+      info: RowLevelOperationInfo): RowLevelOperationBuilder = {
+    catalog.writeTarget = this
+    super.newRowLevelOperationBuilder(info)
+  }
+
+  override def deleteWhere(filters: Array[Filter]): Unit = {
+    catalog.writeTarget = this
+    super.deleteWhere(filters)
+  }
+
+  // Propagates staged data and metadata changes to the delegate table.
+  def commit(): Unit = {
+    delegate.dataMap.clear()
+    delegate.updateColumns(columns()) // Evolve schema if needed.
+    delegate.alterTableWithData(data, schema)
+    delegate.replacedPartitions = replacedPartitions
+    delegate.lastWriteInfo = lastWriteInfo
+    delegate.lastWriteLog = lastWriteLog
+    delegate.commits ++= commits
+    delegate.increaseVersion()
+  }
+}
+
+// A special table catalog used in row-level operation transactions. The lifecycle of this catalog
+// is tied to the transaction. A new catalog instance is created at the beginning of a transaction
+// and discarded at the end. The catalog is responsible for pinning all tables involved in the
+// transaction. Table changes are initially staged in memory and propagated only after an explicit
+// commit.
+class TxnTableCatalog(delegate: InMemoryRowLevelOperationTableCatalog) extends TableCatalog {
+
+  private val tables: util.Map[Identifier, TxnTable] = new ConcurrentHashMap[Identifier, TxnTable]()
+
+  var writeTarget: TxnTable = _
+
+  override def name: String = delegate.name
+
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
+
+  override def listTables(namespace: Array[String]): Array[Identifier] = {
+    throw new UnsupportedOperationException()
+  }
+
+  // This is where the table pinning logic should occur. In this implementation, a tables is loaded
+  // (pinned) the first time is accessed. All subsequent accesses should return the same pinned
+  // table.
+  override def loadTable(ident: Identifier): Table = {
+    tables.computeIfAbsent(ident, _ => {
+      // Wrap the live underlying instance (not a snapshot copy from loadTable) so commits
+      // propagate back to the catalog's authoritative state.
+      val table = delegate.liveTable(ident).asInstanceOf[InMemoryRowLevelOperationTable]
+      new TxnTable(table, table.schema(), this)
+    })
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    // AlterTable may be called by ResolveSchemaEvolution when schema evolution is enabled. Thus,
+    // it needs to be transactional. The schema changes are only propagated to the delegate at
+    // commit time.
+    //
+    // We delegate schema computation to the underlying catalog so that catalogs with special
+    // handling (e.g. PartialSchemaEvolutionCatalog) have the same behaviour inside a
+    // transaction.
+    val txnTable = tables.get(ident)
+    val schema = delegate.computeAlterTableSchema(txnTable.schema, changes.toSeq)
+
+    if (schema.fields.isEmpty) {
+      throw new IllegalArgumentException(s"Cannot drop all fields")
+    }
+
+    val newTxnTable = new TxnTable(txnTable.delegate, schema, this)
+    newTxnTable.scanEvents ++= txnTable.scanEvents
+    tables.put(ident, newTxnTable)
+    newTxnTable
+  }
+
+  // TODO: Currently not transactional. Should be revised when Atomic CTAS/RTAS is implemented.
+  override def createTable(ident: Identifier, tableInfo: TableInfo): Table = {
+    delegate.createTable(ident, tableInfo)
+    loadTable(ident)
+  }
+
+  // TODO: Currently not transactional. Should be revised when Atomic CTAS/RTAS is implemented.
+  override def dropTable(ident: Identifier): Boolean = {
+    tables.remove(ident)
+    delegate.dropTable(ident)
+  }
+
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    throw new UnsupportedOperationException()
+  }
+
+  // Returns all tables that participated in this transaction, keyed by identifier.
+  def txnTables: scala.collection.Map[Identifier, TxnTable] = tables.asScala
+
+  // Commit the write target table, propagating staged changes to the delegate.
+  def commit(): Unit = {
+    if (writeTarget != null) writeTarget.commit()
+  }
+
+  // Clear transaction context.
+  def clearActiveTransaction(): Unit = {
+    val txn = delegate.transaction
+    delegate.lastTransaction = txn
+    delegate.observedTransactions += txn
+    delegate.transaction = null
+  }
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: CatalogPlugin => this.name == that.name
+      case _ => false
+    }
+  }
+
+  override def hashCode(): Int = name.hashCode()
+}
+
+/**
+ * An InMemoryRowLevelOperationTableCatalog that utilizes tables backed by a shared map. This
+ * simulates the behavior of real catalogs (Delta, Iceberg, etc.) where multiple instances
+ * of the catalog share the same underlying persistent storage, thus, they see the same tables.
+ *
+ * This is needed for testing execution that spans multiple Spark sessions. In particular,
+ * streaming queries execute micro-batches in cloned Spark sessions. Without this, the cloned
+ * spark session catalog will not see any tables created in the original session.
+ *
+ * Tests that use this catalog must call
+ * [[SharedTablesInMemoryRowLevelOperationTableCatalog.reset()]] in `afterEach` to clear the
+ * shared state between test cases.
+ */
+class SharedTablesInMemoryRowLevelOperationTableCatalog
+    extends InMemoryRowLevelOperationTableCatalog {
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    super.initialize(name, options)
+    tables = SharedTablesInMemoryRowLevelOperationTableCatalog.sharedTables
+  }
+
+  // Return the live table instance (not a snapshot copy) so that an in-place TRUNCATE --
+  // which resolves its target via the read-path loadTable -- mutates the shared catalog
+  // state instead of a discarded copy. (DROP bypasses loadTable, so it is unaffected.)
+  override def loadTable(ident: Identifier): Table = liveTable(ident)
+}
+
+object SharedTablesInMemoryRowLevelOperationTableCatalog {
+  private[catalog] val sharedTables: ConcurrentHashMap[Identifier, Table] =
+    new ConcurrentHashMap[Identifier, Table]()
+
+  def reset(): Unit = sharedTables.clear()
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2RelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2RelationSuite.scala
index 10ec9efca4aba..a1f1c85fe4d5a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2RelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2RelationSuite.scala
@@ -43,7 +43,7 @@ class DataSourceV2RelationSuite extends SparkFunSuite {
       rowCount = Some(10),
       colStats = Map(
         "id" -> idColStat,
-        // "extra" is not in schema — should be silently skipped
+        // "extra" is not in schema, should be silently skipped
         "extra" -> CatalogColumnStat(distinctCount = Some(5))))
 
     val v2Stats = DataSourceV2Relation.v1StatsToV2Stats(catalogStats, schema)
diff --git a/sql/connect/bin/spark-connect-scala-client b/sql/connect/bin/spark-connect-scala-client
index 019a42a2ba473..5748a7ec125f0 100755
--- a/sql/connect/bin/spark-connect-scala-client
+++ b/sql/connect/bin/spark-connect-scala-client
@@ -72,7 +72,7 @@ JVM_ARGS="-XX:+IgnoreUnrecognizedVMOptions \
   -Dio.netty.tryReflectionSetAccessible=true \
   -Dio.netty.allocator.type=pooled \
   -Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE \
-  -Dio.netty.noUnsafe=false \
+  --sun-misc-unsafe-memory-access=allow \
   --enable-native-access=ALL-UNNAMED \
   $SCJVM_ARGS"
 
diff --git a/sql/connect/client/jdbc/pom.xml b/sql/connect/client/jdbc/pom.xml
index 26c22f5669361..20c921d661ec4 100644
--- a/sql/connect/client/jdbc/pom.xml
+++ b/sql/connect/client/jdbc/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala
index a0b4711c2747b..1f525a541daae 100644
--- a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala
+++ b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala
@@ -209,8 +209,8 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark
     withConnection { conn =>
       val metadata = conn.getMetaData
       // scalastyle:off line.size.limit
-      // CURRENT_PATH is excluded: getSQLKeywords drops SQL:2003 reserved words (see companion).
-      assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLATIONS,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,CURRENT_DATABASE,CURRENT_SCHEMA,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUALIFY,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE")
+      // CURRENT_PATH and SYSTEM are excluded: getSQLKeywords drops SQL:2003 reserved words (see companion).
+      assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,APPROX,ARCHIVE,ASC,BERNOULLI,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLATIONS,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,CURRENT_DATABASE,CURRENT_SCHEMA,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTANCE,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXACT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NEAREST,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUALIFY,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SIMILARITY,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE")
       // scalastyle:on line.size.limit
     }
   }
diff --git a/sql/connect/client/jvm/pom.xml b/sql/connect/client/jvm/pom.xml
index 183d6a0d808ee..ba2c314d2799a 100644
--- a/sql/connect/client/jvm/pom.xml
+++ b/sql/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala b/sql/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
index ca82381eec9e3..e8b14951b68a0 100644
--- a/sql/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
+++ b/sql/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
@@ -26,6 +26,7 @@ import ammonite.compiler.iface.CodeWrapper
 import ammonite.interp.{Interpreter, Watchable}
 import ammonite.main.Defaults
 import ammonite.repl.Repl
+import ammonite.runtime.Storage
 import ammonite.util.{Bind, Imports, Name, PredefInfo, Ref, Res, Util}
 import ammonite.util.Util.newLine
 
@@ -102,9 +103,13 @@ Spark session available as 'spark'.
         |""".stripMargin
     // Please note that we make ammonite generate classes instead of objects.
     // Classes tend to have superior serialization behavior when using UDFs.
+    // SPARK-56448: Use Storage.InMemory to avoid stale compile cache across restarts.
+    // The default Storage.Folder persists compiled predef classes under ~/.ammonite. On a
+    // subsequent REPL start, the cached CodePredef references a stale ArgsPredef, causing NPE.
     val main = new ammonite.Main(
       welcomeBanner = Option(splash.format(spark_version, spark.version)),
       predefCode = predefCode,
+      storageBackend = new Storage.InMemory(),
       replCodeWrapper = ExtendedCodeClassWrapper,
       scriptCodeWrapper = ExtendedCodeClassWrapper,
       inputStream = inputStream,
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNearestByJoinSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNearestByJoinSuite.scala
new file mode 100644
index 0000000000000..00d7c4f80b09d
--- /dev/null
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNearestByJoinSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.connect.test.{QueryTest, RemoteSparkSession}
+import org.apache.spark.sql.functions._
+
+/**
+ * End-to-end Connect-side coverage for `Dataset.nearestByJoin`. Mirrors the
+ * `DataFrameNearestByJoinSuite` in `sql/core` for the classic path; this suite ensures the same
+ * API behaves correctly when invoked through the Connect client (proto serialization, server-side
+ * proto-to-catalyst translation in `SparkConnectPlanner.transformNearestByJoin`, and result
+ * roundtrip).
+ */
+class DataFrameNearestByJoinSuite extends QueryTest with RemoteSparkSession {
+  import testImplicits._
+
+  private lazy val users = Seq((1, 10.0), (2, 20.0), (3, 30.0)).toDF("user_id", "score")
+
+  private lazy val products = Seq(("A", 11.0), ("B", 22.0), ("C", 5.0)).toDF("product", "pscore")
+
+  test("inner approx similarity k=1") {
+    checkAnswer(
+      users
+        .nearestByJoin(
+          right = products,
+          rankingExpression = -abs(users("score") - products("pscore")),
+          numResults = 1,
+          mode = "approx",
+          direction = "similarity")
+        .select("user_id", "product")
+        .orderBy("user_id"),
+      Seq(Row(1, "A"), Row(2, "B"), Row(3, "B")))
+  }
+
+  test("inner approx distance k=2") {
+    checkAnswer(
+      users
+        .nearestByJoin(
+          right = products,
+          rankingExpression = abs(users("score") - products("pscore")),
+          numResults = 2,
+          mode = "approx",
+          direction = "distance")
+        .select("user_id", "product")
+        .orderBy("user_id", "product"),
+      Seq(Row(1, "A"), Row(1, "C"), Row(2, "A"), Row(2, "B"), Row(3, "A"), Row(3, "B")))
+  }
+
+  test("left outer with empty right preserves left rows with NULLs") {
+    val emptyProducts = products.filter(lit(false))
+    checkAnswer(
+      users
+        .nearestByJoin(
+          right = emptyProducts,
+          rankingExpression = -abs(users("score") - emptyProducts("pscore")),
+          numResults = 1,
+          mode = "exact",
+          direction = "similarity",
+          joinType = "leftouter")
+        .select("user_id", "product")
+        .orderBy("user_id"),
+      Seq(Row(1, null), Row(2, null), Row(3, null)))
+  }
+
+  test("output schema has no rewrite-internal columns") {
+    val result = users.nearestByJoin(
+      right = products,
+      rankingExpression = -abs(users("score") - products("pscore")),
+      numResults = 1,
+      mode = "exact",
+      direction = "similarity")
+    // Only the user-visible columns flow through; no `__qid`, `__nearest_matches__`, etc.
+    assert(result.columns.toSet === Set("user_id", "score", "product", "pscore"))
+  }
+
+  test("invalid mode is rejected") {
+    val ex = intercept[AnalysisException] {
+      users.nearestByJoin(
+        right = products,
+        rankingExpression = -abs(users("score") - products("pscore")),
+        numResults = 1,
+        mode = "bogus",
+        direction = "similarity")
+    }
+    assert(ex.getCondition === "NEAREST_BY_JOIN.UNSUPPORTED_MODE")
+  }
+}
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 0c1123f1a76a0..199736da92ac6 100644
--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -403,7 +403,7 @@ class PlanGenerationTestSuite extends ConnectFunSuite with Logging {
   test("read changes with options") {
     session.read
       .option("startingTimestamp", "2026-01-01")
-      .option("deduplicationMode", "netChanges")
+      .option("deduplicationMode", "dropCarryovers")
       .option("computeUpdates", "true")
       .changes("myTable")
   }
@@ -516,6 +516,29 @@ class PlanGenerationTestSuite extends ConnectFunSuite with Logging {
     left.crossJoin(right)
   }
 
+  test("nearestByJoin inner_approx_similarity") {
+    left
+      .as("l")
+      .nearestByJoin(
+        right = right.as("r"),
+        rankingExpression = fn.col("l.a") + fn.col("r.a"),
+        numResults = 1,
+        mode = "approx",
+        direction = "similarity")
+  }
+
+  test("nearestByJoin leftouter_exact_distance") {
+    left
+      .as("l")
+      .nearestByJoin(
+        right = right.as("r"),
+        rankingExpression = fn.col("l.a") + fn.col("r.a"),
+        numResults = 5,
+        mode = "exact",
+        direction = "distance",
+        joinType = "leftouter")
+  }
+
   test("sortWithinPartitions strings") {
     simple.sortWithinPartitions("a", "id")
   }
@@ -2728,6 +2751,10 @@ class PlanGenerationTestSuite extends ConnectFunSuite with Logging {
     fn.is_variant_null(fn.parse_json(fn.col("g")))
   }
 
+  functionTest("is_valid_variant") {
+    fn.is_valid_variant(fn.parse_json(fn.col("g")))
+  }
+
   functionTest("variant_get") {
     fn.variant_get(fn.parse_json(fn.col("g")), "$", "int")
   }
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/AmmoniteReplE2ESuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/AmmoniteReplE2ESuite.scala
new file mode 100644
index 0000000000000..ffa892048c776
--- /dev/null
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/AmmoniteReplE2ESuite.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.application
+
+import java.util.concurrent.TimeUnit
+
+import scala.collection.mutable.ArrayBuffer
+import scala.sys.process.BasicIO
+
+import org.apache.spark.sql.connect.test.{ConnectFunSuite, RemoteSparkSession}
+import org.apache.spark.tags.AmmoniteTest
+
+@AmmoniteTest
+class AmmoniteReplE2ESuite extends ConnectFunSuite with RemoteSparkSession {
+
+  private def runSparkShell(): (Int, String, String) = {
+    val sparkHome = sys.props.getOrElse(
+      "spark.test.home",
+      sys.env.getOrElse("SPARK_HOME", fail("spark.test.home or SPARK_HOME not set")))
+    val command = Seq(s"$sparkHome/bin/spark-shell", "--remote", s"sc://localhost:$serverPort")
+
+    val process = new ProcessBuilder(command: _*).start()
+    // Close stdin immediately so shell exits on EOF
+    process.getOutputStream.close()
+
+    val stdout = new ArrayBuffer[String]()
+    val stderr = new ArrayBuffer[String]()
+    val stdoutThread = new Thread() {
+      setDaemon(true)
+      override def run(): Unit = BasicIO.processFully(stdout += _)(process.getInputStream)
+    }
+    val stderrThread = new Thread() {
+      setDaemon(true)
+      override def run(): Unit = BasicIO.processFully(stderr += _)(process.getErrorStream)
+    }
+    stdoutThread.start()
+    stderrThread.start()
+
+    val exited = process.waitFor(60, TimeUnit.SECONDS)
+    if (!exited) {
+      process.destroyForcibly()
+      fail("spark-shell did not exit within 60 seconds")
+    }
+    stdoutThread.join(10000)
+    stderrThread.join(10000)
+    (process.exitValue(), stdout.mkString("\n"), stderr.mkString("\n"))
+  }
+
+  test("SPARK-56448: restarting spark-shell --remote does not throw NPE") {
+    // First invocation
+    val (exit1, _, stderr1) = runSparkShell()
+    assert(exit1 == 0, s"First spark-shell failed (exit=$exit1): $stderr1")
+
+    // Second invocation -- without the fix, this would NPE from stale Ammonite cache
+    val (exit2, _, stderr2) = runSparkShell()
+    assert(exit2 == 0, s"Second spark-shell failed (exit=$exit2): $stderr2")
+  }
+}
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala
index 61c4502b256d8..5237554b3625d 100644
--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala
@@ -99,12 +99,12 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
         import session.implicits._
         val df1 = Seq("Bob", "Alice", "Nico", "Bob", "Alice").toDF("name")
         df1.write.parquet(table1Dir.getPath)
-        spark.catalog.createTable(parquetTableName, table1Dir.getPath).collect()
+        spark.catalog.createTable(parquetTableName, table1Dir.getPath)
         withTable(orcTableName, jsonTableName) {
           withTempPath { table2Dir =>
             val df2 = Seq("Bob", "Alice", "Nico", "Bob", "Alice").zipWithIndex.toDF("name", "id")
             df2.write.orc(table2Dir.getPath)
-            spark.catalog.createTable(orcTableName, table2Dir.getPath, "orc").collect()
+            spark.catalog.createTable(orcTableName, table2Dir.getPath, "orc")
             val orcTable = spark.catalog.getTable(orcTableName)
             assert(!orcTable.isTemporary)
             assert(orcTable.name == orcTableName)
@@ -117,7 +117,6 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
           val schema = new StructType().add("id", LongType).add("a", DoubleType)
           spark.catalog
             .createTable(jsonTableName, "json", schema, Map.empty[String, String])
-            .collect()
           val jsonTable = spark.catalog.getTable("default", jsonTableName)
           assert(!jsonTable.isTemporary)
           assert(jsonTable.name == jsonTableName)
@@ -151,6 +150,40 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
     assert(spark.catalog.listTables().collect().isEmpty)
   }
 
+  test("createTable should be eager") {
+    val tableName = "eager_table"
+    withTable(tableName) {
+      withTempPath { dir =>
+        val session = spark
+        import session.implicits._
+        Seq((1, "a")).toDF("id", "value").write.parquet(dir.getPath)
+        spark.catalog.createTable(tableName, dir.getPath)
+        assert(spark.catalog.tableExists(tableName))
+      }
+    }
+  }
+
+  test("createTable(tableName, path) uses spark.sql.sources.default") {
+    val tableName = "default_source_table"
+    withSQLConf("spark.sql.sources.default" -> "json") {
+      withTable(tableName) {
+        withTempPath { dir =>
+          val session = spark
+          import session.implicits._
+          // Write the data as JSON. If createTable hardcoded the parquet provider, reading the
+          // table back would fail because the files are not parquet.
+          Seq((1, "a")).toDF("id", "value").write.json(dir.getPath)
+          spark.catalog.createTable(tableName, dir.getPath)
+          assert(spark.catalog.tableExists(tableName))
+          val ddl = spark.catalog.getCreateTableString(tableName)
+          assert(ddl.toLowerCase(java.util.Locale.ROOT).contains("using json"))
+          // Reading the table back succeeds only if it was created with the json provider.
+          assert(spark.table(tableName).count() == 1)
+        }
+      }
+    }
+  }
+
   test("Cache Table APIs") {
     val parquetTableName = "parquet_table"
     withTable(parquetTableName) {
@@ -159,7 +192,7 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
         import session.implicits._
         val df1 = Seq("Bob", "Alice", "Nico", "Bob", "Alice").toDF("name")
         df1.write.parquet(table1Dir.getPath)
-        spark.catalog.createTable(parquetTableName, table1Dir.getPath).collect()
+        spark.catalog.createTable(parquetTableName, table1Dir.getPath)
 
         // Test cache and uncacheTable
         spark.catalog.cacheTable(parquetTableName)
@@ -375,7 +408,7 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
         val session = spark
         import session.implicits._
         Seq(1).toDF("id").write.parquet(dir.getPath)
-        spark.catalog.createTable(tbl, dir.getPath).collect()
+        spark.catalog.createTable(tbl, dir.getPath)
         assert(spark.catalog.tableExists(tbl))
         spark.catalog.dropTable(tbl)
         assert(!spark.catalog.tableExists(tbl))
@@ -445,7 +478,7 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
         val session = spark
         import session.implicits._
         Seq(1).toDF("id").write.parquet(dir.getPath)
-        spark.catalog.createTable(t, dir.getPath).collect()
+        spark.catalog.createTable(t, dir.getPath)
         val ddl = spark.catalog.getCreateTableString(t)
         assert(ddl.nonEmpty && ddl.toLowerCase(java.util.Locale.ROOT).contains("create"))
       }
@@ -470,7 +503,7 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
         val session = spark
         import session.implicits._
         Seq(1).toDF("id").write.parquet(dir.getPath)
-        spark.catalog.createTable(t, dir.getPath).collect()
+        spark.catalog.createTable(t, dir.getPath)
         spark.catalog.analyzeTable(t, noScan = true)
       }
     }
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/DataFrameSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/DataFrameSuite.scala
index 890245fdd2fba..57b8080c4b137 100644
--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/DataFrameSuite.scala
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/DataFrameSuite.scala
@@ -68,14 +68,24 @@ class DataFrameSuite extends QueryTest with RemoteSparkSession {
   }
 
   test("lazy column validation") {
-    val session = spark
-    import session.implicits._
-
-    val df1 = Seq(1 -> "y").toDF("a", "y")
-    val df2 = Seq(1 -> "x").toDF("a", "x")
-    val df3 = df1.join(df2, df1("a") === df2("a"))
-    val df4 = df3.select(df1("x")) // <- No exception here
-
-    intercept[AnalysisException] { df4.schema }
+    // The test relies on strict plan-id-based resolution: with the name-based fallback
+    // enabled, df1("x") would resolve to df2.x via the join output and df4.schema would
+    // succeed. Pin the config directly via spark.conf.set/unset; the lazy SQLConf entry
+    // trips withSQLConf's isModifiable check on the Connect server, so we cannot use that
+    // helper here.
+    spark.conf.set("spark.sql.analyzer.strictDataFrameColumnResolution", "true")
+    try {
+      val session = spark
+      import session.implicits._
+
+      val df1 = Seq(1 -> "y").toDF("a", "y")
+      val df2 = Seq(1 -> "x").toDF("a", "x")
+      val df3 = df1.join(df2, df1("a") === df2("a"))
+      val df4 = df3.select(df1("x")) // <- No exception here
+
+      intercept[AnalysisException] { df4.schema }
+    } finally {
+      spark.conf.unset("spark.sql.analyzer.strictDataFrameColumnResolution")
+    }
   }
 }
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala
new file mode 100644
index 0000000000000..88ed1f31c86ae
--- /dev/null
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connect.test.{ConnectFunSuite, RemoteSparkSession, SQLHelper}
+import org.apache.spark.sql.functions.current_path
+
+/**
+ * End-to-end coverage for the SQL Standard PATH feature over Spark Connect.
+ *
+ * SET PATH and the frozen-path semantics for persisted views / SQL functions are implemented
+ * entirely server-side, but the analyzer state (`AnalysisContext`) that carries the pinned path
+ * must survive plan reification across the gRPC boundary. These tests run the public surface over
+ * a real Connect client so regressions there are caught:
+ *   - `SET PATH = ...` is parsed and applied to the session,
+ *   - `current_path()` (SQL and the DataFrame builtin) reflects it,
+ *   - a persisted view created under one path resolves its body under the frozen path even when
+ *     the invoker switches the session path.
+ */
+class SqlPathE2ETestSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelper {
+
+  test("SET PATH and current_path() round-trip over Connect") {
+    withSQLConf("spark.sql.path.enabled" -> "true") {
+      try {
+        spark.sql("SET PATH = spark_catalog.default, system.builtin")
+        val sqlPath = spark.sql("SELECT current_path()").head().getString(0)
+        assert(
+          sqlPath == "spark_catalog.default,system.builtin",
+          s"current_path() over Connect should reflect SET PATH; got: $sqlPath")
+
+        // DataFrame builtin should agree with the SQL form.
+        val apiPath = spark.range(1).select(current_path()).head().getString(0)
+        assert(
+          apiPath == sqlPath,
+          s"functions.current_path() should match SQL current_path(); got: $apiPath vs $sqlPath")
+      } finally {
+        spark.sql("SET PATH = DEFAULT_PATH")
+      }
+    }
+  }
+
+  test("Persisted view body uses frozen path over Connect") {
+    withSQLConf("spark.sql.path.enabled" -> "true") {
+      withDatabase("connect_path_a", "connect_path_b") {
+        spark.sql("CREATE DATABASE connect_path_a")
+        spark.sql("CREATE DATABASE connect_path_b")
+        spark.sql("CREATE TABLE connect_path_a.frozen_t USING parquet AS SELECT 1 AS id")
+        spark.sql("CREATE TABLE connect_path_b.frozen_t USING parquet AS SELECT 2 AS id")
+        withView("default.v_path_connect") {
+          try {
+            // Create the view under PATH=a.
+            spark.sql("SET PATH = spark_catalog.connect_path_a, system.builtin")
+            spark.sql("CREATE VIEW default.v_path_connect AS SELECT id FROM frozen_t")
+
+            // Switch the session path to b; bare `frozen_t` now resolves through b,
+            // but the view's frozen path keeps it pinned to a.
+            spark.sql("SET PATH = spark_catalog.connect_path_b, system.builtin")
+            val bare = spark.sql("SELECT id FROM frozen_t").head().getInt(0)
+            assert(bare == 2, s"Bare `frozen_t` should follow live PATH=b; got: $bare")
+            val viaView = spark.sql("SELECT id FROM default.v_path_connect").head().getInt(0)
+            assert(
+              viaView == 1,
+              s"View body should resolve via the frozen creation-time PATH; got: $viaView")
+          } finally {
+            spark.sql("SET PATH = DEFAULT_PATH")
+          }
+        }
+      }
+    }
+  }
+
+  test("SET PATH is rejected over Connect when feature is disabled") {
+    withSQLConf("spark.sql.path.enabled" -> "false") {
+      val ex = intercept[AnalysisException] {
+        spark.sql("SET PATH = spark_catalog.default")
+      }
+      assert(
+        ex.getCondition == "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED",
+        s"Expected SET_PATH_WHEN_DISABLED, got: ${ex.getCondition}")
+    }
+  }
+}
diff --git a/sql/connect/common/pom.xml b/sql/connect/common/pom.xml
index 16bc4bb462310..870a1aed08643 100644
--- a/sql/connect/common/pom.xml
+++ b/sql/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.13</artifactId>
-        <version>4.2.0.1-4.3.0-0</version>
+        <version>4.2.0.1-4.3.0-1</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/commands.proto b/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
index c22e76e3542f5..dcf5aff2366f7 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
@@ -144,6 +144,9 @@ message WriteOperation {
   // (Optional) Columns used for clustering the table.
   repeated string clustering_columns = 10;
 
+  // (Optional) Whether schema evolution is enabled for the write.
+  bool with_schema_evolution = 11;
+
   message SaveTable {
     // (Required) The table name.
     string table_name = 1;
@@ -211,6 +214,9 @@ message WriteOperationV2 {
 
   // (Optional) Columns used for clustering the table.
   repeated string clustering_columns = 9;
+
+  // (Optional) Whether schema evolution is enabled for the write.
+  bool with_schema_evolution = 10;
 }
 
 // Starts write stream operation as streaming query. Query ID and Run ID of the streaming
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/pipelines.proto b/sql/connect/common/src/main/protobuf/spark/connect/pipelines.proto
index 6438583c9d47c..7632ec95b9b5e 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/pipelines.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/pipelines.proto
@@ -22,6 +22,7 @@ package spark.connect;
 import "google/protobuf/any.proto";
 import "google/protobuf/timestamp.proto";
 import "spark/connect/common.proto";
+import "spark/connect/expressions.proto";
 import "spark/connect/relations.proto";
 import "spark/connect/types.proto";
 
@@ -144,6 +145,7 @@ message PipelineCommand {
 
     oneof details {
       WriteRelationFlowDetails relation_flow_details = 7;
+      AutoCdcFlowDetails auto_cdc_flow_details = 10;
       google.protobuf.Any extension = 999;
     }
 
@@ -154,6 +156,46 @@ message PipelineCommand {
       optional spark.connect.Relation relation = 1;
     }
 
+    // Details for Auto CDC flows.
+    message AutoCdcFlowDetails {
+      // The name of the CDC source to stream from.
+      optional string source = 1;
+
+      // Column(s) that uniquely identify a row in source and target data.
+      repeated Expression keys = 2;
+
+      // Expression to order the source data.
+      optional Expression sequence_by = 3;
+
+      // Delete condition for the merged operation.
+      optional Expression apply_as_deletes = 6;
+
+      // Truncate condition for the merged operation.
+      optional Expression apply_as_truncates = 7;
+
+      // Columns included in the output table.
+      repeated Expression column_list = 8;
+
+      // Columns excluded from the output table.
+      repeated Expression except_column_list = 9;
+
+      // SCD Type for target table.
+      SCDType stored_as_scd_type = 10;
+
+      // Subset of columns to ignore null in updates.
+      repeated Expression ignore_null_updates_column_list = 14;
+
+      // Subset of columns excluded from ignoring null in updates.
+      repeated Expression ignore_null_updates_except_column_list = 15;
+
+    }
+
+    // SCD Type for Auto CDC target tables.
+    enum SCDType {
+      SCD_TYPE_UNSPECIFIED = 0;
+      SCD_TYPE_1 = 1;
+    }
+
     // If true, define the flow as a one-time flow, such as for backfill.
     // Set to true changes the flow in two ways:
     //   - The flow is run one time by default. If the pipeline is ran with a full refresh,
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/relations.proto b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
index 57c4ed7be3c84..95cc9281d8cad 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -82,6 +82,7 @@ message Relation {
     LateralJoin lateral_join = 44;
     ChunkedCachedLocalRelation chunked_cached_local_relation = 45;
     RelationChanges relation_changes = 46;
+    NearestByJoin nearest_by_join = 47;
 
     // NA functions
     NAFill fill_na = 90;
@@ -1276,3 +1277,33 @@ message LateralJoin {
   // (Required) The join type.
   Join.JoinType join_type = 4;
 }
+
+// Relation of type [[NearestByJoin]].
+//
+// For each row on the left side, returns up to `num_results` rows from the right side ranked
+// by `ranking_expression`.
+message NearestByJoin {
+  // (Required) Left (query) input relation.
+  Relation left = 1;
+
+  // (Required) Right (base) input relation.
+  Relation right = 2;
+
+  // (Required) Scalar expression used to rank candidate rows on the right side.
+  Expression ranking_expression = 3;
+
+  // (Required) Maximum number of matches per left row. Must be between 1 and 100000.
+  int32 num_results = 4;
+
+  // The following three fields use `string` (not typed enums) for parity with `AsOfJoin`,
+  // which models analogous fields the same way. Validation happens server-side at planning time.
+
+  // (Required) The join type. Must be one of: "inner", "leftouter".
+  string join_type = 5;
+
+  // (Required) Search algorithm contract. Must be one of: "approx", "exact".
+  string mode = 6;
+
+  // (Required) Ranking direction. Must be one of: "distance", "similarity".
+  string direction = 7;
+}
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Catalog.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Catalog.scala
index ea4bc3e7ad604..ce7a10c4026c2 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Catalog.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Catalog.scala
@@ -392,13 +392,15 @@ class Catalog(sparkSession: SparkSession) extends catalog.Catalog {
    * @since 3.5.0
    */
   override def createTable(tableName: String, path: String): DataFrame = {
-    sparkSession.newDataFrame { builder =>
-      builder.getCatalogBuilder.getCreateTableBuilder
-        .setTableName(tableName)
-        .setSchema(DataTypeProtoConverter.toConnectProtoType(new StructType))
-        .setDescription("")
-        .putOptions("path", path)
-    }
+    // Leave the source unset so the server resolves spark.sql.sources.default, as documented
+    // above. Routing through createTable(tableName, path, "parquet") would hardcode the provider
+    // and ignore that configuration.
+    createTable(
+      tableName = tableName,
+      source = None,
+      schema = new StructType,
+      description = "",
+      options = Map("path" -> path))
   }
 
   /**
@@ -484,16 +486,31 @@ class Catalog(sparkSession: SparkSession) extends catalog.Catalog {
       schema: StructType,
       description: String,
       options: Map[String, String]): DataFrame = {
-    sparkSession.newDataFrame { builder =>
+    createTable(tableName, Some(source), schema, description, options)
+  }
+
+  /**
+   * Shared implementation for the public `createTable` overloads. When `source` is `None`, the
+   * proto's `source` field is left unset so the server resolves `spark.sql.sources.default`;
+   * otherwise the provided source is pinned via `setSource`.
+   */
+  private def createTable(
+      tableName: String,
+      source: Option[String],
+      schema: StructType,
+      description: String,
+      options: Map[String, String]): DataFrame = {
+    sparkSession.execute { builder =>
       val createTableBuilder = builder.getCatalogBuilder.getCreateTableBuilder
         .setTableName(tableName)
-        .setSource(source)
         .setSchema(DataTypeProtoConverter.toConnectProtoType(schema))
         .setDescription(description)
+      source.foreach(createTableBuilder.setSource)
       options.foreach { case (k, v) =>
         createTableBuilder.putOptions(k, v)
       }
     }
+    sparkSession.table(tableName)
   }
 
   /**
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala
index 2038037d4439c..c79b86bd77c66 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala
@@ -129,6 +129,8 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) extends sql.DataFra
       builder.putOptions(k, v)
     }
 
+    builder.setWithSchemaEvolution(_withSchemaEvolution)
+
     ds.sparkSession.execute(proto.Command.newBuilder().setWriteOperation(builder).build())
   }
 
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala
index 06d339487bfb8..aa6d52c32dc66 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala
@@ -116,6 +116,7 @@ final class DataFrameWriterV2[T] private[sql] (table: String, ds: Dataset[T])
   }
 
   private def executeWriteOperation(mode: proto.WriteOperationV2.Mode): Unit = {
+    builder.setWithSchemaEvolution(_withSchemaEvolution)
     val command = proto.Command
       .newBuilder()
       .setWriteOperationV2(builder.setMode(mode))
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamReader.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamReader.scala
index 2f80402739954..50c0de85e0cb9 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamReader.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamReader.scala
@@ -75,16 +75,9 @@ final class DataStreamReader private[sql] (sparkSession: SparkSession)
     this
   }
 
-  /**
-   * Specifies a name for the streaming source. This name is used to identify the source in
-   * checkpoint metadata and enables stable checkpoint locations for source evolution.
-   *
-   * @param sourceName
-   *   the name to assign to this streaming source
-   * @since 4.2.0
-   */
+  /** @inheritdoc */
   @Experimental
-  private[sql] def name(sourceName: String): this.type = {
+  override def name(sourceName: String): this.type = {
     validateSourceName(sourceName)
     sourceBuilder.setSourceName(sourceName)
     this
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala
index ffa11b5d7ab0d..bac41acc83f03 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala
@@ -82,6 +82,11 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T])
     this
   }
 
+  /** @inheritdoc */
+  private[sql] def name(sinkName: String): this.type = {
+    throw new UnsupportedOperationException("Sink naming is not supported in Spark Connect")
+  }
+
   /** @inheritdoc */
   def format(source: String): this.type = {
     sinkBuilder.setFormat(source)
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
index b57ea66bb1f7d..34c685213711c 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
 import org.apache.spark.sql.catalyst.expressions.OrderUtils
+import org.apache.spark.sql.catalyst.plans.NearestByJoinValidation
 import org.apache.spark.sql.connect.ColumnNodeToProtoConverter.{toExpr, toLiteral, toTypedExpr}
 import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.connect.client.SparkResult
@@ -421,6 +422,52 @@ class Dataset[T] private[sql] (
     lateralJoin(right, Some(joinExprs), joinType)
   }
 
+  private def nearestByJoinImpl(
+      right: sql.Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      joinType: String,
+      mode: String,
+      direction: String): DataFrame = {
+    // Validate locally so Connect users see the same errors as the classic path without a
+    // server round-trip. The validation logic mirrors `NearestByJoinType.apply` /
+    // `NearestByJoinMode.apply` / `NearestByDirection.apply` in sql/catalyst, which
+    // `sql/connect/common` cannot import; the acceptance lists themselves are shared via
+    // `NearestByJoinValidation` in sql-api.
+    Dataset.validateNearestByJoinArgs(numResults, joinType, mode, direction)
+    sparkSession.newDataFrame(Seq(rankingExpression)) { builder =>
+      builder.getNearestByJoinBuilder
+        .setLeft(plan.getRoot)
+        .setRight(right.plan.getRoot)
+        .setRankingExpression(toExpr(rankingExpression))
+        .setNumResults(numResults)
+        .setJoinType(joinType)
+        .setMode(mode)
+        .setDirection(direction)
+    }
+  }
+
+  /** @inheritdoc */
+  def nearestByJoin(
+      right: sql.Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      mode: String,
+      direction: String): DataFrame = {
+    nearestByJoinImpl(right, rankingExpression, numResults, "inner", mode, direction)
+  }
+
+  /** @inheritdoc */
+  def nearestByJoin(
+      right: sql.Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      mode: String,
+      direction: String,
+      joinType: String): DataFrame = {
+    nearestByJoinImpl(right, rankingExpression, numResults, joinType, mode, direction)
+  }
+
   override protected def sortInternal(global: Boolean, sortCols: Seq[Column]): Dataset[T] = {
     val sortExprs = sortCols.map { c =>
       ColumnNodeToProtoConverter(c.sortOrder).getSortOrder
@@ -1569,3 +1616,47 @@ class Dataset[T] private[sql] (
   override def queryExecution: QueryExecution =
     throw ConnectClientUnsupportedErrors.queryExecution()
 }
+
+private[sql] object Dataset {
+
+  private[connect] def validateNearestByJoinArgs(
+      numResults: Int,
+      joinType: String,
+      mode: String,
+      direction: String): Unit = {
+    if (numResults < 1 || numResults > NearestByJoinValidation.MaxNumResults) {
+      throw new AnalysisException(
+        errorClass = "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+        messageParameters = Map(
+          "numResults" -> numResults.toString,
+          "min" -> "1",
+          "max" -> NearestByJoinValidation.MaxNumResults.toString))
+    }
+    val canonicalJoinType = joinType.toLowerCase(java.util.Locale.ROOT).replace("_", "")
+    if (!NearestByJoinValidation.SupportedJoinTypes.contains(canonicalJoinType)) {
+      throw new AnalysisException(
+        errorClass = "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+        messageParameters = Map(
+          "joinType" -> joinType,
+          "supported" -> NearestByJoinValidation.SupportedJoinTypeDisplay))
+    }
+    if (!NearestByJoinValidation.SupportedModes.contains(
+        mode.toLowerCase(java.util.Locale.ROOT))) {
+      throw new AnalysisException(
+        errorClass = "NEAREST_BY_JOIN.UNSUPPORTED_MODE",
+        messageParameters = Map(
+          "mode" -> mode,
+          "supported" ->
+            NearestByJoinValidation.SupportedModes.mkString("'", "', '", "'")))
+    }
+    if (!NearestByJoinValidation.SupportedDirections.contains(
+        direction.toLowerCase(java.util.Locale.ROOT))) {
+      throw new AnalysisException(
+        errorClass = "NEAREST_BY_JOIN.UNSUPPORTED_DIRECTION",
+        messageParameters = Map(
+          "direction" -> direction,
+          "supported" ->
+            NearestByJoinValidation.SupportedDirections.mkString("'", "', '", "'")))
+    }
+  }
+}
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_valid_variant.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_valid_variant.explain
new file mode 100644
index 0000000000000..93c3d3c0547e0
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_valid_variant.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(VariantExpressionEvalUtils.isValidVariant(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true, true)))) AS is_valid_variant(parse_json(g))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
index 988447c2d6418..62eeba6355107 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(VariantExpressionEvalUtils.isVariantNull(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)))) AS is_variant_null(parse_json(g))#0]
+Project [static_invoke(VariantExpressionEvalUtils.isVariantNull(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true, true)))) AS is_variant_null(parse_json(g))#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
index a40f89c03c888..ddaea2f318b10 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)) AS parse_json(g)#0]
+Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true, true)) AS parse_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
index c82a10655c332..70831c24afe83 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(SchemaOfVariant.schemaOfVariant(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)))) AS schema_of_variant(parse_json(g))#0]
+Project [static_invoke(SchemaOfVariant.schemaOfVariant(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true, true)))) AS schema_of_variant(parse_json(g))#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
index 3d894628ab7e0..de06881d96d42 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
@@ -1,2 +1,2 @@
-Aggregate [schema_of_variant_agg(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)), 0, 0) AS schema_of_variant_agg(parse_json(g))#0]
+Aggregate [schema_of_variant_agg(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true, true)), 0, 0) AS schema_of_variant_agg(parse_json(g))#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
index 7a0c0078128f5..efbfc467ba4b1 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, false)) AS try_parse_json(g)#0]
+Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, false, true)) AS try_parse_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
index 65f527da78c4a..68eace9d83224 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
@@ -1,2 +1,2 @@
-Project [try_variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)), $, IntegerType, false, Some(America/Los_Angeles)) AS try_variant_get(parse_json(g), $)#0]
+Project [try_variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true, true)), $, IntegerType, false, Some(America/Los_Angeles)) AS try_variant_get(parse_json(g), $)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
index 33c6b3a52f529..c13d457744923 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
@@ -1,2 +1,2 @@
-Project [variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)), $, IntegerType, true, Some(America/Los_Angeles)) AS variant_get(parse_json(g), $)#0]
+Project [variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true, true)), $, IntegerType, true, Some(America/Los_Angeles)) AS variant_get(parse_json(g), $)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/nearestByJoin_inner_approx_similarity.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/nearestByJoin_inner_approx_similarity.explain
new file mode 100644
index 0000000000000..8e3750b4c4a76
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/nearestByJoin_inner_approx_similarity.explain
@@ -0,0 +1,5 @@
+'NearestByJoin Inner, true, 1, (a#0 + a#0), NearestBySimilarity
+:- SubqueryAlias l
+:  +- LocalRelation <empty>, [id#0L, a#0, b#0]
++- SubqueryAlias r
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/nearestByJoin_leftouter_exact_distance.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/nearestByJoin_leftouter_exact_distance.explain
new file mode 100644
index 0000000000000..67539c3964b1d
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/nearestByJoin_leftouter_exact_distance.explain
@@ -0,0 +1,5 @@
+'NearestByJoin LeftOuter, false, 5, (a#0 + a#0), NearestByDistance
+:- SubqueryAlias l
+:  +- LocalRelation <empty>, [id#0L, a#0, b#0]
++- SubqueryAlias r
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes.explain
index 413eaf8f7a686..9de2e887e1ee6 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes.explain
@@ -1,2 +1,2 @@
-SubqueryAlias primary.tempdb.myTable
-+- RelationV2[id#0L, _change_type#0, _commit_version#0L, _commit_timestamp#0] primary.tempdb.myTable
+SubqueryAlias spark_catalog.tempdb.myTable
++- RelationV2[id#0L, _change_type#0, _commit_version#0L, _commit_timestamp#0] spark_catalog.tempdb.myTable
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes_with_options.explain
index 413eaf8f7a686..9de2e887e1ee6 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes_with_options.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/read_changes_with_options.explain
@@ -1,2 +1,2 @@
-SubqueryAlias primary.tempdb.myTable
-+- RelationV2[id#0L, _change_type#0, _commit_version#0L, _commit_timestamp#0] primary.tempdb.myTable
+SubqueryAlias spark_catalog.tempdb.myTable
++- RelationV2[id#0L, _change_type#0, _commit_version#0L, _commit_timestamp#0] spark_catalog.tempdb.myTable
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/read_table.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_table.explain
index 979084f06a87a..e5dce0fe05742 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/read_table.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/read_table.explain
@@ -1,2 +1,2 @@
-SubqueryAlias primary.tempdb.myTable
-+- RelationV2[id#0L] primary.tempdb.myTable
+SubqueryAlias spark_catalog.tempdb.myTable
++- RelationV2[id#0L] spark_catalog.tempdb.myTable
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
index f94e0a850e403..9bcbf88135399 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
@@ -1,2 +1,2 @@
-Sample 0.0, 0.43, false, 9890823
+Sample 0.0, 0.43, false, 9890823, Bernoulli
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
index 340c25ab6d017..5af5314e48f90 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
@@ -1,2 +1,2 @@
-Sample 0.0, 0.23, true, 898
+Sample 0.0, 0.23, true, 898, Bernoulli
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_changes_API_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_changes_API_with_options.explain
index 6f12567607ac0..6a1afa73f7cda 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_changes_API_with_options.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_changes_API_with_options.explain
@@ -1,2 +1,2 @@
-~SubqueryAlias primary.tempdb.myStreamingTable
-+- ~StreamingRelationV2 primary.tempdb.myStreamingTable_changelog, ChangelogTable(org.apache.spark.sql.connector.catalog.InMemoryChangelog,ChangelogInfo{range=VersionRange[startingVersion=1, endingVersion=Optional.empty, startingBoundInclusive=true, endingBoundInclusive=true], deduplicationMode=DROP_CARRYOVERS, computeUpdates=false}), [startingVersion=1, deduplicationMode=dropCarryovers], [id#0L, _change_type#0, _commit_version#0L, _commit_timestamp#0], org.apache.spark.sql.connector.catalog.InMemoryChangelogCatalog, tempdb.myStreamingTable, name=<Unassigned>
+~SubqueryAlias spark_catalog.tempdb.myStreamingTable
++- ~StreamingRelationV2 spark_catalog.tempdb.myStreamingTable_changelog, ChangelogTable(org.apache.spark.sql.connector.catalog.InMemoryChangelog,ChangelogContext{range=VersionRange[startingVersion=1, endingVersion=Optional.empty, startingBoundInclusive=true, endingBoundInclusive=true], deduplicationMode=DROP_CARRYOVERS, computeUpdates=false},true), [startingVersion=1, deduplicationMode=dropCarryovers], [id#0L, _change_type#0, _commit_version#0L, _commit_timestamp#0], org.apache.spark.sql.connector.catalog.InMemoryChangelogCatalog, tempdb.myStreamingTable, name=<Unassigned>
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
index 9ea4ad218a5f4..dc17d3503894d 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
@@ -1,2 +1,2 @@
-~SubqueryAlias primary.tempdb.myStreamingTable
-+- ~StreamingRelationV2 primary.tempdb.myStreamingTable, org.apache.spark.sql.connector.catalog.InMemoryTable, [p1=v1, p2=v2], [id#0L], org.apache.spark.sql.connector.catalog.InMemoryChangelogCatalog, tempdb.myStreamingTable, name=<Unassigned>
+~SubqueryAlias spark_catalog.tempdb.myStreamingTable
++- ~StreamingRelationV2 spark_catalog.tempdb.myStreamingTable, org.apache.spark.sql.connector.catalog.InMemoryTable, [p1=v1, p2=v2], [id#0L], org.apache.spark.sql.connector.catalog.InMemoryChangelogCatalog, tempdb.myStreamingTable, name=<Unassigned>
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/table.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/table.explain
index 979084f06a87a..e5dce0fe05742 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/table.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/table.explain
@@ -1,2 +1,2 @@
-SubqueryAlias primary.tempdb.myTable
-+- RelationV2[id#0L] primary.tempdb.myTable
+SubqueryAlias spark_catalog.tempdb.myTable
++- RelationV2[id#0L] spark_catalog.tempdb.myTable
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
index 979084f06a87a..e5dce0fe05742 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
@@ -1,2 +1,2 @@
-SubqueryAlias primary.tempdb.myTable
-+- RelationV2[id#0L] primary.tempdb.myTable
+SubqueryAlias spark_catalog.tempdb.myTable
++- RelationV2[id#0L] spark_catalog.tempdb.myTable
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_is_valid_variant.json b/sql/connect/common/src/test/resources/query-tests/queries/function_is_valid_variant.json
new file mode 100644
index 0000000000000..cdf4cca04fda6
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_is_valid_variant.json
@@ -0,0 +1,83 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "is_valid_variant",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "parse_json",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              },
+              "common": {
+                "origin": {
+                  "jvmOrigin": {
+                    "stackTrace": [{
+                      "classLoaderName": "app",
+                      "declaringClass": "org.apache.spark.sql.functions$",
+                      "methodName": "col",
+                      "fileName": "functions.scala"
+                    }, {
+                      "classLoaderName": "app",
+                      "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+                      "methodName": "~~trimmed~anonfun~~",
+                      "fileName": "PlanGenerationTestSuite.scala"
+                    }]
+                  }
+                }
+              }
+            }],
+            "isInternal": false
+          },
+          "common": {
+            "origin": {
+              "jvmOrigin": {
+                "stackTrace": [{
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.functions$",
+                  "methodName": "parse_json",
+                  "fileName": "functions.scala"
+                }, {
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+                  "methodName": "~~trimmed~anonfun~~",
+                  "fileName": "PlanGenerationTestSuite.scala"
+                }]
+              }
+            }
+          }
+        }],
+        "isInternal": false
+      },
+      "common": {
+        "origin": {
+          "jvmOrigin": {
+            "stackTrace": [{
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.functions$",
+              "methodName": "is_valid_variant",
+              "fileName": "functions.scala"
+            }, {
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+              "methodName": "~~trimmed~anonfun~~",
+              "fileName": "PlanGenerationTestSuite.scala"
+            }]
+          }
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_is_valid_variant.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_is_valid_variant.proto.bin
new file mode 100644
index 0000000000000..b327ddbfc0874
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_is_valid_variant.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_inner_approx_similarity.json b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_inner_approx_similarity.json
new file mode 100644
index 0000000000000..ca4f2919e55c6
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_inner_approx_similarity.json
@@ -0,0 +1,109 @@
+{
+  "common": {
+    "planId": "4"
+  },
+  "nearestByJoin": {
+    "left": {
+      "common": {
+        "planId": "1"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "0"
+          },
+          "localRelation": {
+            "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+          }
+        },
+        "alias": "l"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "3"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "2"
+          },
+          "localRelation": {
+            "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+          }
+        },
+        "alias": "r"
+      }
+    },
+    "rankingExpression": {
+      "unresolvedFunction": {
+        "functionName": "+",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "l.a"
+          },
+          "common": {
+            "origin": {
+              "jvmOrigin": {
+                "stackTrace": [{
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.functions$",
+                  "methodName": "col",
+                  "fileName": "functions.scala"
+                }, {
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+                  "methodName": "~~trimmed~anonfun~~",
+                  "fileName": "PlanGenerationTestSuite.scala"
+                }]
+              }
+            }
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "r.a"
+          },
+          "common": {
+            "origin": {
+              "jvmOrigin": {
+                "stackTrace": [{
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.functions$",
+                  "methodName": "col",
+                  "fileName": "functions.scala"
+                }, {
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+                  "methodName": "~~trimmed~anonfun~~",
+                  "fileName": "PlanGenerationTestSuite.scala"
+                }]
+              }
+            }
+          }
+        }],
+        "isInternal": false
+      },
+      "common": {
+        "origin": {
+          "jvmOrigin": {
+            "stackTrace": [{
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.Column",
+              "methodName": "$plus",
+              "fileName": "Column.scala"
+            }, {
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+              "methodName": "~~trimmed~anonfun~~",
+              "fileName": "PlanGenerationTestSuite.scala"
+            }]
+          }
+        }
+      }
+    },
+    "numResults": 1,
+    "joinType": "inner",
+    "mode": "approx",
+    "direction": "similarity"
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_inner_approx_similarity.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_inner_approx_similarity.proto.bin
new file mode 100644
index 0000000000000..8dbeb994d8fcd
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_inner_approx_similarity.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_leftouter_exact_distance.json b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_leftouter_exact_distance.json
new file mode 100644
index 0000000000000..877bff8f90c8e
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_leftouter_exact_distance.json
@@ -0,0 +1,109 @@
+{
+  "common": {
+    "planId": "4"
+  },
+  "nearestByJoin": {
+    "left": {
+      "common": {
+        "planId": "1"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "0"
+          },
+          "localRelation": {
+            "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+          }
+        },
+        "alias": "l"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "3"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "2"
+          },
+          "localRelation": {
+            "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+          }
+        },
+        "alias": "r"
+      }
+    },
+    "rankingExpression": {
+      "unresolvedFunction": {
+        "functionName": "+",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "l.a"
+          },
+          "common": {
+            "origin": {
+              "jvmOrigin": {
+                "stackTrace": [{
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.functions$",
+                  "methodName": "col",
+                  "fileName": "functions.scala"
+                }, {
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+                  "methodName": "~~trimmed~anonfun~~",
+                  "fileName": "PlanGenerationTestSuite.scala"
+                }]
+              }
+            }
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "r.a"
+          },
+          "common": {
+            "origin": {
+              "jvmOrigin": {
+                "stackTrace": [{
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.functions$",
+                  "methodName": "col",
+                  "fileName": "functions.scala"
+                }, {
+                  "classLoaderName": "app",
+                  "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+                  "methodName": "~~trimmed~anonfun~~",
+                  "fileName": "PlanGenerationTestSuite.scala"
+                }]
+              }
+            }
+          }
+        }],
+        "isInternal": false
+      },
+      "common": {
+        "origin": {
+          "jvmOrigin": {
+            "stackTrace": [{
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.Column",
+              "methodName": "$plus",
+              "fileName": "Column.scala"
+            }, {
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+              "methodName": "~~trimmed~anonfun~~",
+              "fileName": "PlanGenerationTestSuite.scala"
+            }]
+          }
+        }
+      }
+    },
+    "numResults": 5,
+    "joinType": "leftouter",
+    "mode": "exact",
+    "direction": "distance"
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_leftouter_exact_distance.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_leftouter_exact_distance.proto.bin
new file mode 100644
index 0000000000000..a671071c556ed
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/nearestByJoin_leftouter_exact_distance.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.json b/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.json
index ddc20aada18b8..f24d67a6b1121 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.json
@@ -6,7 +6,7 @@
     "unparsedIdentifier": "myTable",
     "options": {
       "startingTimestamp": "2026-01-01",
-      "deduplicationMode": "netChanges",
+      "deduplicationMode": "dropCarryovers",
       "computeUpdates": "true"
     }
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.proto.bin
index 00ab977b46596..4d5c973813268 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/read_changes_with_options.proto.bin differ
diff --git a/sql/connect/server/pom.xml b/sql/connect/server/pom.xml
index 6cb88edf24686..f70eb91b0291c 100644
--- a/sql/connect/server/pom.xml
+++ b/sql/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala
index dd47997650402..f8edbc9928000 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.connect.pipelines
 
-import scala.collection.Seq
 import scala.jdk.CollectionConverters._
 import scala.util.Using
 
@@ -25,16 +24,21 @@ import io.grpc.stub.StreamObserver
 
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.{ExecutePlanResponse, PipelineCommandResult, Relation, ResolvedIdentifier}
+import org.apache.spark.connect.proto.PipelineCommand.DefineFlow.AutoCdcFlowDetails
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, Column}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.{Command, CreateNamespace, CreateTable, CreateTableAsSelect, CreateView, DescribeRelation, DescribeTablePartition, DropView, InsertIntoStatement, LogicalPlan, RenameTable, ShowColumns, ShowCreateTable, ShowFunctions, ShowTableProperties, ShowTables, ShowViews}
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.connect.common.DataTypeProtoConverter
 import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.execution.command.{ShowCatalogsCommand, ShowNamespacesCommand}
 import org.apache.spark.sql.pipelines.Language.Python
+import org.apache.spark.sql.pipelines.autocdc.{ChangeArgs, ColumnSelection, ScdType, UnqualifiedColumnName}
 import org.apache.spark.sql.pipelines.common.RunState.{CANCELED, FAILED}
-import org.apache.spark.sql.pipelines.graph.{AllTables, FlowAnalysis, GraphIdentifierManager, GraphRegistrationContext, IdentifierHelper, NoTables, PipelineUpdateContextImpl, QueryContext, QueryOrigin, QueryOriginType, Sink, SinkImpl, SomeTables, SqlGraphRegistrationContext, Table, TableFilter, TemporaryView, UnresolvedFlow}
+import org.apache.spark.sql.pipelines.graph.{AllTables, AutoCdcFlow, FlowAnalysis, GraphIdentifierManager, GraphRegistrationContext, IdentifierHelper, NoTables, PipelineUpdateContextImpl, QueryContext, QueryOrigin, QueryOriginType, Sink, SinkImpl, SomeTables, SqlGraphRegistrationContext, Table, TableFilter, TemporaryView, UntypedFlow}
 import org.apache.spark.sql.pipelines.logging.{PipelineEvent, RunProgress}
 import org.apache.spark.sql.types.StructType
 
@@ -52,6 +56,8 @@ private[connect] object PipelinesHandler extends Logging {
    * @param transformRelationFunc
    *   Function used to convert a relation to a LogicalPlan. This is used when determining the
    *   LogicalPlan that a flow returns.
+   * @param transformExpressionFunc
+   *   Function used to convert a proto expression to a Catalyst expression.
    * @return
    *   The response after handling the command
    */
@@ -59,7 +65,8 @@ private[connect] object PipelinesHandler extends Logging {
       sessionHolder: SessionHolder,
       cmd: proto.PipelineCommand,
       responseObserver: StreamObserver[ExecutePlanResponse],
-      transformRelationFunc: Relation => LogicalPlan): PipelineCommandResult = {
+      transformRelationFunc: Relation => LogicalPlan,
+      transformExpressionFunc: proto.Expression => Expression): PipelineCommandResult = {
     // Currently most commands do not include any information in the response. We just send back
     // an empty response to the client to indicate that the command was handled successfully
     val defaultResponse = PipelineCommandResult.getDefaultInstance
@@ -99,7 +106,11 @@ private[connect] object PipelinesHandler extends Logging {
       case proto.PipelineCommand.CommandTypeCase.DEFINE_FLOW =>
         logInfo(s"Define pipelines flow cmd received: $cmd")
         val resolvedFlow =
-          defineFlow(cmd.getDefineFlow, transformRelationFunc, sessionHolder)
+          defineFlow(
+            cmd.getDefineFlow,
+            transformRelationFunc,
+            transformExpressionFunc,
+            sessionHolder)
         val identifierBuilder = ResolvedIdentifier.newBuilder()
         resolvedFlow.catalog.foreach(identifierBuilder.setCatalogName)
         resolvedFlow.database.foreach { ns =>
@@ -315,6 +326,7 @@ private[connect] object PipelinesHandler extends Logging {
   private def defineFlow(
       flow: proto.PipelineCommand.DefineFlow,
       transformRelationFunc: Relation => LogicalPlan,
+      transformExpressionFunc: proto.Expression => Expression,
       sessionHolder: SessionHolder): TableIdentifier = {
     if (flow.hasOnce) {
       throw new AnalysisException(
@@ -367,27 +379,137 @@ private[connect] object PipelinesHandler extends Logging {
         }
       }
 
-    val relationFlowDetails = flow.getRelationFlowDetails
-    graphElementRegistry.registerFlow(
-      UnresolvedFlow(
-        identifier = flowIdentifier,
-        destinationIdentifier = destinationIdentifier,
-        func = FlowAnalysis.createFlowFunctionFromLogicalPlan(
-          transformRelationFunc(relationFlowDetails.getRelation)),
-        sqlConf = flow.getSqlConfMap.asScala.toMap,
-        once = false,
-        queryContext = QueryContext(Option(defaultCatalog), Option(defaultDatabase)),
-        origin = QueryOrigin(
-          filePath = Option.when(flow.getSourceCodeLocation.hasFileName)(
-            flow.getSourceCodeLocation.getFileName),
-          line = Option.when(flow.getSourceCodeLocation.hasLineNumber)(
-            flow.getSourceCodeLocation.getLineNumber),
-          objectType = Some(QueryOriginType.Flow.toString),
-          objectName = Option(flowIdentifier.unquotedString),
-          language = Some(Python()))))
+    flow.getDetailsCase match {
+      case proto.PipelineCommand.DefineFlow.DetailsCase.RELATION_FLOW_DETAILS =>
+        val relationFlowDetails = flow.getRelationFlowDetails
+        graphElementRegistry.registerFlow(
+          UntypedFlow(
+            identifier = flowIdentifier,
+            destinationIdentifier = destinationIdentifier,
+            func = FlowAnalysis.createFlowFunctionFromLogicalPlan(
+              transformRelationFunc(relationFlowDetails.getRelation)),
+            sqlConf = flow.getSqlConfMap.asScala.toMap,
+            once = false,
+            queryContext = QueryContext(Option(defaultCatalog), Option(defaultDatabase)),
+            origin = flowOrigin(flow, flowIdentifier)))
+      case proto.PipelineCommand.DefineFlow.DetailsCase.AUTO_CDC_FLOW_DETAILS =>
+        graphElementRegistry.registerFlow(
+          buildAutoCdcFlow(
+            autoCdcDetails = flow.getAutoCdcFlowDetails,
+            flow = flow,
+            flowIdentifier = flowIdentifier,
+            destinationIdentifier = destinationIdentifier,
+            defaultCatalog = defaultCatalog,
+            defaultDatabase = defaultDatabase,
+            sessionHolder = sessionHolder,
+            transformExpressionFunc = transformExpressionFunc))
+      case other =>
+        throw new UnsupportedOperationException(s"Unsupported DefineFlow details case: $other")
+    }
     flowIdentifier
   }
 
+  /**
+   * Build an [[AutoCdcFlow]] from the proto-supplied AutoCDC flow details.
+   *
+   * The flow's source expression is encoded by the Python client as a streaming-table name; we
+   * model that on the server side as a streaming [[UnresolvedRelation]] so that pipelines flow
+   * analysis (which already handles `STREAM(t)` references) can resolve it against the rest of
+   * the dataflow graph.
+   */
+  private def buildAutoCdcFlow(
+      autoCdcDetails: AutoCdcFlowDetails,
+      flow: proto.PipelineCommand.DefineFlow,
+      flowIdentifier: TableIdentifier,
+      destinationIdentifier: TableIdentifier,
+      defaultCatalog: String,
+      defaultDatabase: String,
+      sessionHolder: SessionHolder,
+      transformExpressionFunc: proto.Expression => Expression): AutoCdcFlow = {
+    // TODO(SPARK-57092): apply_as_truncates is declared on AutoCdcFlowDetails but is not yet
+    //   honored by the engine; wire it through once SCD1 truncate support lands.
+    // TODO(SPARK-57093): ignore_null_updates_column_list and ignore_null_updates_except_column_list
+    //   are declared on AutoCdcFlowDetails but are not yet honored by the engine; wire them
+    //   through once SCD1 ignore-null support lands.
+
+    if (!autoCdcDetails.hasSource) {
+      throw new AnalysisException("AUTOCDC_MISSING_SOURCE", Map.empty)
+    }
+    if (!autoCdcDetails.hasSequenceBy) {
+      throw new AnalysisException("AUTOCDC_MISSING_SEQUENCE_BY", Map.empty)
+    }
+
+    val sourcePlan: LogicalPlan = UnresolvedRelation(
+      multipartIdentifier = GraphIdentifierManager
+        .parseTableIdentifier(name = autoCdcDetails.getSource, spark = sessionHolder.session)
+        .nameParts,
+      isStreaming = true)
+
+    val toColumn: proto.Expression => Column = expr => Column(transformExpressionFunc(expr))
+
+    val asUnqualifiedColumnName: proto.Expression => UnqualifiedColumnName = expr =>
+      transformExpressionFunc(expr) match {
+        case a: UnresolvedAttribute => UnqualifiedColumnName(a.nameParts)
+        case other =>
+          throw new AnalysisException(
+            "AUTOCDC_NON_COLUMN_IDENTIFIER",
+            Map("expression" -> other.sql))
+      }
+
+    val keys = autoCdcDetails.getKeysList.asScala.toSeq.map(asUnqualifiedColumnName)
+
+    val columnSelection: Option[ColumnSelection] = {
+      val included = autoCdcDetails.getColumnListList.asScala.toSeq
+      val excluded = autoCdcDetails.getExceptColumnListList.asScala.toSeq
+      if (included.nonEmpty && excluded.nonEmpty) {
+        throw new AnalysisException("AUTOCDC_BOTH_COLUMN_LIST_AND_EXCEPT_COLUMN_LIST", Map.empty)
+      } else if (included.nonEmpty) {
+        Some(ColumnSelection.IncludeColumns(included.map(asUnqualifiedColumnName)))
+      } else if (excluded.nonEmpty) {
+        Some(ColumnSelection.ExcludeColumns(excluded.map(asUnqualifiedColumnName)))
+      } else {
+        None
+      }
+    }
+
+    // Get user specified SCD type, or default to SCD1 if unspecified.
+    val scdType: ScdType = autoCdcDetails.getStoredAsScdType match {
+      case proto.PipelineCommand.DefineFlow.SCDType.SCD_TYPE_1 |
+          proto.PipelineCommand.DefineFlow.SCDType.SCD_TYPE_UNSPECIFIED =>
+        ScdType.Type1
+      case other =>
+        throw new UnsupportedOperationException(s"Unsupported AutoCDC SCD type: $other")
+    }
+
+    val changeArgs = ChangeArgs(
+      keys = keys,
+      sequencing = toColumn(autoCdcDetails.getSequenceBy),
+      storedAsScdType = scdType,
+      deleteCondition =
+        Option.when(autoCdcDetails.hasApplyAsDeletes)(toColumn(autoCdcDetails.getApplyAsDeletes)),
+      columnSelection = columnSelection)
+
+    AutoCdcFlow(
+      identifier = flowIdentifier,
+      destinationIdentifier = destinationIdentifier,
+      func = FlowAnalysis.createFlowFunctionFromLogicalPlan(sourcePlan),
+      sqlConf = flow.getSqlConfMap.asScala.toMap,
+      queryContext = QueryContext(Option(defaultCatalog), Option(defaultDatabase)),
+      origin = flowOrigin(flow, flowIdentifier),
+      changeArgs = changeArgs)
+  }
+
+  private def flowOrigin(
+      flow: proto.PipelineCommand.DefineFlow,
+      flowIdentifier: TableIdentifier): QueryOrigin = QueryOrigin(
+    filePath =
+      Option.when(flow.getSourceCodeLocation.hasFileName)(flow.getSourceCodeLocation.getFileName),
+    line = Option.when(flow.getSourceCodeLocation.hasLineNumber)(
+      flow.getSourceCodeLocation.getLineNumber),
+    objectType = Some(QueryOriginType.Flow.toString),
+    objectName = Option(flowIdentifier.unquotedString),
+    language = Some(Python()))
+
   private def startRun(
       cmd: proto.PipelineCommand.StartRun,
       responseObserver: StreamObserver[ExecutePlanResponse],
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index e9282cd4cb1b7..c84eaadaa4537 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -43,7 +43,7 @@ import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID}
 import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest}
 import org.apache.spark.sql.{AnalysisException, Column, Encoders, ForeachWriter, Row}
 import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, InternalRow, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.{ChangelogInfoUtils, FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, RelationChanges, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedOrdinal, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose}
+import org.apache.spark.sql.catalyst.analysis.{ChangelogContextUtils, FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, RelationChanges, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedOrdinal, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose}
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder, ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ProductEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, UnboundRowEncoder}
 import org.apache.spark.sql.catalyst.expressions._
@@ -159,6 +159,8 @@ class SparkConnectPlanner(
         case proto.Relation.RelTypeCase.JOIN => transformJoinOrJoinWith(rel.getJoin)
         case proto.Relation.RelTypeCase.AS_OF_JOIN => transformAsOfJoin(rel.getAsOfJoin)
         case proto.Relation.RelTypeCase.LATERAL_JOIN => transformLateralJoin(rel.getLateralJoin)
+        case proto.Relation.RelTypeCase.NEAREST_BY_JOIN =>
+          transformNearestByJoin(rel.getNearestByJoin)
         case proto.Relation.RelTypeCase.DEDUPLICATE => transformDeduplicate(rel.getDeduplicate)
         case proto.Relation.RelTypeCase.SET_OP => transformSetOperation(rel.getSetOp)
         case proto.Relation.RelTypeCase.SORT => transformSort(rel.getSort)
@@ -1739,10 +1741,10 @@ class SparkConnectPlanner(
     val tableName = rel.getUnparsedIdentifier
     val options = new CaseInsensitiveStringMap(rel.getOptionsMap)
     val timeZone = session.sessionState.conf.sessionLocalTimeZone
-    val changelogInfo = ChangelogInfoUtils.fromOptions(options, timeZone)
+    val ctx = ChangelogContextUtils.fromOptions(options, timeZone)
     val ident = parser.parseMultipartIdentifier(tableName)
     val relation = UnresolvedRelation(ident, options, isStreaming = rel.getIsStreaming)
-    RelationChanges(relation, changelogInfo)
+    RelationChanges(relation, ctx)
   }
 
   private def transformParse(rel: proto.Parse): LogicalPlan = {
@@ -2567,6 +2569,28 @@ class SparkConnectPlanner(
       condition = joinCondition)
   }
 
+  private def transformNearestByJoin(rel: proto.NearestByJoin): LogicalPlan = {
+    assertPlan(rel.hasLeft && rel.hasRight, "Both join sides must be present")
+    assertPlan(rel.hasRankingExpression, "Ranking expression must be present")
+    // proto3 string fields default to "" when not set; reject the empty case explicitly so the
+    // user sees a "must be set" error instead of a misleading "unsupported value" error.
+    assertPlan(rel.getJoinType.nonEmpty, "NearestByJoin.join_type must be set")
+    assertPlan(rel.getMode.nonEmpty, "NearestByJoin.mode must be set")
+    assertPlan(rel.getDirection.nonEmpty, "NearestByJoin.direction must be set")
+    val left = Dataset.ofRows(session, transformRelation(rel.getLeft))
+    val right = Dataset.ofRows(session, transformRelation(rel.getRight))
+    val rankingExpression = Column(transformExpression(rel.getRankingExpression))
+    left
+      .nearestByJoin(
+        right,
+        rankingExpression,
+        rel.getNumResults,
+        rel.getMode,
+        rel.getDirection,
+        rel.getJoinType)
+      .logicalPlan
+  }
+
   private def transformSort(sort: proto.Sort): LogicalPlan = {
     assertPlan(sort.getOrderCount > 0, "'order' must be present and contain elements.")
     logical.Sort(
@@ -2931,7 +2955,8 @@ class SparkConnectPlanner(
       sessionHolder,
       command,
       responseObserver,
-      transformRelation)
+      transformRelation,
+      transformExpression)
     executeHolder.eventsManager.postFinished()
     responseObserver.onNext(
       proto.ExecutePlanResponse
@@ -3353,6 +3378,10 @@ class SparkConnectPlanner(
       w.format(writeOperation.getSource)
     }
 
+    if (writeOperation.getWithSchemaEvolution) {
+      w.withSchemaEvolution()
+    }
+
     writeOperation.getSaveTypeCase match {
       case proto.WriteOperation.SaveTypeCase.SAVETYPE_NOT_SET => w.saveCommand(None)
       case proto.WriteOperation.SaveTypeCase.PATH =>
@@ -3419,6 +3448,10 @@ class SparkConnectPlanner(
       w.clusterBy(names.head, names.tail.toSeq: _*)
     }
 
+    if (writeOperation.getWithSchemaEvolution) {
+      w.withSchemaEvolution()
+    }
+
     writeOperation.getMode match {
       case proto.WriteOperationV2.Mode.MODE_CREATE =>
         if (writeOperation.hasProvider) {
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
index 97da96894e2a9..56973304f41e7 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
@@ -215,21 +215,49 @@ object StreamingForeachBatchHelper extends Logging {
     // Mapping from streaming (queryId, runId) to runner cleaner. Used for Python foreachBatch.
     private val cleanerCache: ConcurrentMap[CacheKey, AutoCloseable] = new ConcurrentHashMap()
 
-    private lazy val streamingListener = { // Initialized on first registered query
-      val listener = new StreamingRunnerCleanerListener
-      sessionHolder.session.streams.addListener(listener)
-      logInfo(
-        log"[session: ${MDC(SESSION_ID, sessionHolder.sessionId)}] " +
-          log"[userId: ${MDC(USER_ID, sessionHolder.userId)}] " +
-          log"Registered runner clean up listener.")
-      listener
+    // The runner clean-up listener, registered on first use and removed on cleanup. Both
+    // operations are guarded by `this`, and the field is recoverable: after removal a later
+    // registration re-adds a fresh listener, so cleanUpAll() does not permanently disable the cache
+    // if it is reused. (Today cleanUpAll() is only called on the close path, after which
+    // registration fast-paths on isClosing -- but correctness no longer depends on that.)
+    private var streamingListener: StreamingRunnerCleanerListener = _
+
+    private def ensureListenerRegistered(): Unit = synchronized {
+      if (streamingListener == null) {
+        val listener = new StreamingRunnerCleanerListener
+        sessionHolder.session.streams.addListener(listener)
+        streamingListener = listener
+        logInfo(
+          log"[session: ${MDC(SESSION_ID, sessionHolder.sessionId)}] " +
+            log"[userId: ${MDC(USER_ID, sessionHolder.userId)}] " +
+            log"Registered runner clean up listener.")
+      }
+    }
+
+    // Removes the listener from session.streams if it is currently registered.
+    // SessionHolder.close() does not remove this listener (it is not tracked in the session's
+    // listenerCache), so the cache must drop it on cleanup; otherwise the listener keeps this
+    // CleanerCache / SessionHolder reachable after the session is closed.
+    private def removeListenerIfRegistered(): Unit = synchronized {
+      if (streamingListener != null) {
+        sessionHolder.session.streams.removeListener(streamingListener)
+        streamingListener = null
+      }
     }
 
     private[connect] def registerCleanerForQuery(
         query: StreamingQuery,
         cleaner: AutoCloseable): Unit = {
 
-      streamingListener // Access to initialize
+      // Fast path: if the session is already closing, do not even register the listener (it is
+      // added to session.streams and is not removed by SessionHolder.close(), so it would leak and
+      // keep the closed session reachable). Just close the runner and return.
+      if (sessionHolder.isClosing) {
+        cleaner.close()
+        return
+      }
+
+      ensureListenerRegistered() // Register the runner clean-up listener if not already.
       val key = CacheKey(query.id.toString, query.runId.toString)
 
       Option(cleanerCache.putIfAbsent(key, cleaner)) match {
@@ -237,12 +265,27 @@ object StreamingForeachBatchHelper extends Logging {
           throw IllegalStateErrors.cleanerAlreadySet(sessionHolder.key.toString, key.toString)
         case None => // Inserted. Normal.
       }
+
+      // Guard against the same shutdown race that SparkConnectStreamingQueryCache handles for
+      // queries: SessionHolder.close() reaps runners via cleanUpAll(), and a cleaner registered
+      // after that pass (for a query started concurrently with close()) would be missed by it. The
+      // onQueryTerminated listener is the other reaper, but it too can miss a cleaner whose query
+      // already terminated before this registration. So after inserting we re-check isClosing; if
+      // the session started closing in the meantime we clean the runner up here to avoid stranding
+      // a Python worker. We also drop the listener: it was just added above (possibly after close()
+      // ran cleanUpAll()), and close() does not remove it, so it would otherwise leak.
+      if (sessionHolder.isClosing) {
+        cleanupStreamingRunner(key)
+        removeListenerIfRegistered()
+      }
     }
 
     /** Cleans up all the registered runners. */
     private[connect] def cleanUpAll(): Unit = {
       // Clean up all remaining registered runners.
       cleanerCache.keySet().asScala.foreach(cleanupStreamingRunner(_))
+      // Drop the listener as well; close() does not remove it otherwise.
+      removeListenerIfRegistered()
     }
 
     private def cleanupStreamingRunner(key: CacheKey): Unit = {
@@ -279,6 +322,10 @@ object StreamingForeachBatchHelper extends Logging {
         .toMap
     }
 
-    private[connect] def listenerForTesting: StreamingQueryListener = streamingListener
+    // Reads the listener under the same lock that guards registration/removal so concurrent tests
+    // see a consistent value rather than a stale/torn read of the field.
+    private[connect] def listenerForTesting: StreamingQueryListener = synchronized {
+      streamingListener
+    }
   }
 }
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/PreSharedKeyAuthenticationInterceptor.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/PreSharedKeyAuthenticationInterceptor.scala
index 5d7cc65358eb3..b997f9d0d910b 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/PreSharedKeyAuthenticationInterceptor.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/PreSharedKeyAuthenticationInterceptor.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.connect.service
 
+import java.nio.charset.StandardCharsets.UTF_8
+import java.security.MessageDigest
+
 import io.grpc.{Metadata, ServerCall, ServerCallHandler, ServerInterceptor, Status}
 
 class PreSharedKeyAuthenticationInterceptor(token: String) extends ServerInterceptor {
@@ -36,7 +39,9 @@ class PreSharedKeyAuthenticationInterceptor(token: String) extends ServerInterce
       val status = Status.UNAUTHENTICATED.withDescription("No authentication token provided")
       call.close(status, new Metadata())
       new ServerCall.Listener[ReqT]() {}
-    } else if (authHeaderValue != expectedValue) {
+    } else if (!MessageDigest.isEqual(
+        authHeaderValue.getBytes(UTF_8),
+        expectedValue.getBytes(UTF_8))) {
       val status = Status.UNAUTHENTICATED.withDescription("Invalid authentication token")
       call.close(status, new Metadata())
       new ServerCall.Listener[ReqT]() {}
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
index 307416a659f7a..2276230545e67 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
@@ -89,6 +89,13 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
   // Set only by close(), and only once.
   @volatile private var closedTimeMs: Option[Long] = None
 
+  /**
+   * Whether this session is closing or already closed. closedTimeMs is set at the very beginning
+   * of [[close]], before any session resources (e.g. running streaming queries) are cleaned up,
+   * so this can be used to detect a session shutdown that races with newly started operations.
+   */
+  private[connect] def isClosing: Boolean = closedTimeMs.isDefined
+
   // Custom timeout after a session expires due to inactivity.
   // Used by SparkConnectSessionManager instead of default timeout if set.
   // Setting it to -1 indicated forever.
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
index 8ff13f5afe191..17f94f56f11c1 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.connect.service
 
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, Executors, ScheduledExecutorService, TimeUnit}
-import java.util.concurrent.atomic.AtomicReference
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
 
 import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
@@ -59,13 +59,20 @@ private[connect] class SparkConnectStreamingQueryCache(
       query: StreamingQuery,
       tags: Set[String],
       operationId: String): Unit = {
+    // If the query is already inactive by the time it is registered (e.g. a Trigger.AvailableNow
+    // query that already finished, or a query that was stopped right after start()), set its
+    // expiry time immediately so that it is reaped on the regular schedule, instead of lingering
+    // in the cache as a falsely "active" entry until a later maintenance cycle notices it stopped.
+    val expiresAtMs =
+      if (query.isActive) None
+      else Some(clock.getTimeMillis() + stoppedQueryInactivityTimeout.toMillis)
     val value = QueryCacheValue(
       userId = sessionHolder.userId,
       sessionId = sessionHolder.sessionId,
       session = sessionHolder.session,
       query = query,
       operationId = operationId,
-      expiresAtMs = None)
+      expiresAtMs = expiresAtMs)
 
     val queryKey = QueryCacheKey(query.id.toString, query.runId.toString)
     tags.foreach { tag => addTaggedQuery(tag, queryKey) }
@@ -86,6 +93,66 @@ private[connect] class SparkConnectStreamingQueryCache(
       })
 
     schedulePeriodicChecks() // Start the scheduler thread if it has not been started.
+
+    // Guard against a race with session shutdown. SessionHolder.close() stops all of a session's
+    // streaming queries through cleanupRunningQueries(), which iterates over this cache. A query
+    // registered *after* that iteration would otherwise be missed and left running, holding a
+    // strong reference to the now-closed session so that the driver can never exit.
+    //
+    // We publish the entry with queryCache.compute() *first* and only then read
+    // sessionHolder.isClosing; close() writes the volatile closedTimeMs *before* it iterates this
+    // cache in cleanupRunningQueries(). Because both the cache publish and closedTimeMs are
+    // volatile, at least one side always observes the other:
+    //   - if the cleanup forEach observes our entry, cleanupRunningQueries() stops the query;
+    //   - otherwise the forEach missed it, which means our compute() linearized *after* close()'s
+    //     read of that key's bin. Since closedTimeMs was written before that bin read (program
+    //     order in close()) and our compute() precedes the isClosing read (program order here),
+    //     transitivity guarantees we observe isClosing == true and stop the query ourselves.
+    // Either one or both sides stop the query. StreamingQuery.stop() and the cache removal below
+    // are idempotent and isActive-guarded, so both sides firing is harmless.
+    if (sessionHolder.isClosing) {
+      logWarning(
+        log"Stopping streaming query registered for a closing session. " +
+          log"Query Id: ${MDC(QUERY_ID, query.id)}, " +
+          log"runId: ${MDC(QUERY_RUN_ID, query.runId)}, " +
+          log"session ${MDC(SESSION_ID, sessionHolder.sessionId)}.")
+      // Stop asynchronously (stop() may block) and drop the cache entry only after the query has
+      // actually been stopped. Removing it before the stop succeeds would discard the only
+      // server-side handle to a query that might still be running, re-introducing the leak this
+      // guards against. If the stop fails we keep the entry cached so periodicMaintenance can reap
+      // it once the query becomes inactive (and so cleanupRunningQueries can still find it).
+      Future {
+        try {
+          if (query.isActive) query.stop()
+          // Drop only the entry we inserted, matched by query identity. Identity (rather than
+          // queryCache.remove(queryKey, value) by case-class equality) ensures we still remove the
+          // entry if the maintenance thread concurrently rewrote its expiresAtMs after observing
+          // the just-stopped query, while still never evicting a later replacement for the same key
+          // (queryCache.compute allows replacement, though it is not expected).
+          val removed = new AtomicBoolean(false)
+          queryCache.computeIfPresent(
+            queryKey,
+            (_, current) => {
+              if (current.query eq query) {
+                removed.set(true)
+                null
+              } else {
+                current
+              }
+            })
+          if (removed.get()) {
+            tags.foreach { tag => removeTaggedQuery(tag, queryKey) }
+          }
+        } catch {
+          case NonFatal(ex) =>
+            logWarning(
+              log"Failed to stop streaming query ${MDC(QUERY_ID, query.id)} " +
+                log"runId: ${MDC(QUERY_RUN_ID, query.runId)} " +
+                log"for a closing session; leaving it cached for later cleanup.",
+              ex)
+        }
+      }(ExecutionContext.global)
+    }
   }
 
   /**
@@ -227,6 +294,15 @@ private[connect] class SparkConnectStreamingQueryCache(
       })
   }
 
+  private def removeTaggedQuery(tag: String, queryKey: QueryCacheKey): Unit = {
+    taggedQueries.computeIfPresent(
+      tag,
+      (k, v) => {
+        // removeKey returns true once the set is empty; drop the tag entry in that case.
+        if (v.removeKey(queryKey)) null else v
+      })
+  }
+
   /**
    * Periodic maintenance task to do the following:
    *   - Update status of query if it is inactive. Sets an expiry time for such queries
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala
new file mode 100644
index 0000000000000..1a31e5f8ac1a3
--- /dev/null
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession}
+import org.apache.spark.sql.connector.{DSv2CacheTableReadTests, DSv2IncrementallyConstructedQueryTests, DSv2RepeatedTableAccessTests, DSv2TempViewWithStoredPlanTests}
+import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMemoryTableCatalog, NullTableIdAndNullColumnIdInMemoryTableCatalog, NullTableIdInMemoryTableCatalog, TableCatalog}
+
+/**
+ * Connect-mode counterpart of [[org.apache.spark.sql.connector.DataSourceV2DataFrameSuite]].
+ *
+ * Runs DSv2 temp view tests ([[DSv2TempViewWithStoredPlanTests]]), repeated table access tests
+ * ([[DSv2RepeatedTableAccessTests]]), incrementally constructed query tests
+ * ([[DSv2IncrementallyConstructedQueryTests]]), and CACHE TABLE read tests
+ * ([[DSv2CacheTableReadTests]]) under Spark Connect. All test logic lives in the shared traits;
+ * this class only provides the Connect-specific session, catalog access, and result comparison.
+ */
+class DataSourceV2DataFrameConnectSuite
+    extends SparkConnectServerTest
+    with DSv2TempViewWithStoredPlanTests
+    with DSv2RepeatedTableAccessTests
+    with DSv2IncrementallyConstructedQueryTests
+    with DSv2CacheTableReadTests {
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.testcat.copyOnLoad", "true")
+    .set("spark.sql.catalog.cachingcat", classOf[CachingInMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.cachingcat.copyOnLoad", "true")
+    .set("spark.sql.catalog.nullidcat", classOf[NullTableIdInMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.nullidcat.copyOnLoad", "true")
+    .set(
+      "spark.sql.catalog.nullbothidscat",
+      classOf[NullTableIdAndNullColumnIdInMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.nullbothidscat.copyOnLoad", "true")
+
+  override protected def testPrefix: String = "[connect] "
+  override protected def isConnect: Boolean = true
+
+  override protected def withTestSession(fn: SparkSession => Unit): Unit =
+    withSession(fn)
+
+  // Cannot use QueryTest.checkAnswer directly because it accesses df.logicalPlan,
+  // df.queryExecution, and df.materializedRdd, which are not available on Connect *client*
+  // DataFrames (they throw ConnectClientUnsupportedErrors). Note: checkAnswer IS usable from
+  // Connect server tests that operate on classic server-side DataFrames, but in this suite
+  // `df` is a Connect client DataFrame returned by session.table() / session.sql().
+  // Instead, collect the rows and delegate to QueryTest.sameRows, which is the same
+  // value-based, order-agnostic comparison that checkAnswer uses internally.
+  override protected def checkRows(df: => DataFrame, expected: Seq[Row]): Unit =
+    QueryTest.sameRows(expected, df.collect().toSeq).foreach(msg => fail(msg))
+
+  override protected def getTableCatalog[C <: TableCatalog: ClassTag](
+      session: SparkSession,
+      catalogName: String): C = {
+    val serverSession = getServerSession(session)
+    val catalog = serverSession.sessionState.catalogManager.catalog(catalogName)
+    val ct = implicitly[ClassTag[C]]
+    require(
+      ct.runtimeClass.isInstance(catalog),
+      s"Expected ${ct.runtimeClass.getName} but got ${catalog.getClass.getName}")
+    catalog.asInstanceOf[C]
+  }
+
+  // No explicit clearCache() for cachingcat is needed here, unlike the classic suite.
+  // Each withSession call creates a freshly isolated SparkSession on the server side
+  // (via SparkConnectSessionManager.newIsolatedSession), and afterEach invalidates all
+  // sessions, so the CachingInMemoryTableCatalog instance is per-test.
+  override protected def withTestTableAndViews(
+      session: SparkSession,
+      table: String,
+      views: Seq[String] = Seq.empty)(fn: => Unit): Unit = {
+    try { fn }
+    finally {
+      views.foreach(v => session.sql(s"DROP VIEW IF EXISTS $v").collect())
+      session.sql(s"DROP TABLE IF EXISTS $table").collect()
+    }
+  }
+}
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
index 506a75ca3b4c3..bf8469f19ee75 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
@@ -28,14 +28,14 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.sql.catalyst.{catalog, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.{caseSensitiveResolution, Analyzer, FunctionRegistry, Resolver, TableFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{caseSensitiveResolution, Analyzer, FunctionRegistry, RelationCache, Resolver, TableFunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer.{ReplaceExpressions, RewriteWithExpression}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.planner.SparkConnectPlanner
-import org.apache.spark.sql.connector.catalog.{CatalogManager, Column, Identifier, InMemoryChangelogCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Column, DefaultCatalogManager, Identifier, InMemoryChangelogCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -133,9 +133,24 @@ class ProtoToParsedPlanTestSuite
   protected val goldenFilePath: Path = suiteBaseResourcePath.resolve("explain-results")
   private val emptyProps: util.Map[String, String] = util.Collections.emptyMap()
 
-  private val analyzer = {
+  /**
+   * Isolated from [[SharedSparkSession]] so PATH / session path settings do not affect catalog.
+   * Cloned from the test session's conf so all sparkConf overrides (ANSI, alias config, etc.) are
+   * preserved automatically; only the genuine isolation knob is overridden explicitly.
+   */
+  private lazy val analyzerIsolationConf: SQLConf = {
+    val c = spark.sessionState.conf.clone()
+    c.setConf(SQLConf.PATH_ENABLED, false)
+    c
+  }
+
+  private lazy val analyzer = {
     val inMemoryCatalog = new InMemoryChangelogCatalog
-    inMemoryCatalog.initialize("primary", CaseInsensitiveStringMap.empty())
+    // Name must match [[CatalogManager.SESSION_CATALOG_NAME]]: path entries use
+    // [[currentCatalog.name()]], then resolution calls [[catalogManager.catalog]] on that segment.
+    inMemoryCatalog.initialize(
+      CatalogManager.SESSION_CATALOG_NAME,
+      CaseInsensitiveStringMap.empty())
     inMemoryCatalog.createNamespace(Array("tempdb"), emptyProps)
     inMemoryCatalog.createTable(
       Identifier.of(Array("tempdb"), "myTable"),
@@ -148,16 +163,18 @@ class ProtoToParsedPlanTestSuite
       Array.empty[Transform],
       emptyProps)
 
-    val catalogManager = new CatalogManager(
+    val catalogManager = new DefaultCatalogManager(
       inMemoryCatalog,
       new SessionCatalog(
         new catalog.InMemoryCatalog(),
         FunctionRegistry.builtin,
         TableFunctionRegistry.builtin))
-    catalogManager.setCurrentCatalog("primary")
+    // Do not call setCurrentCatalog("primary"): that loads a separate plugin via
+    // Catalogs.load("primary", conf) instead of using defaultSessionCatalog (inMemoryCatalog).
+    // Leave current catalog as default spark_catalog so v2SessionCatalog returns inMemoryCatalog.
     catalogManager.setCurrentNamespace(Array("tempdb"))
 
-    new Analyzer(catalogManager) {
+    new Analyzer(catalogManager, RelationCache.empty, Some(analyzerIsolationConf)) {
       override def resolver: Resolver = caseSensitiveResolution
     }
   }
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/pipelines/PythonPipelineSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/pipelines/PythonPipelineSuite.scala
index fd05b0cc357eb..8752f61d486f2 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/pipelines/PythonPipelineSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/pipelines/PythonPipelineSuite.scala
@@ -29,16 +29,19 @@ import scala.util.Try
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
+import org.apache.spark.SparkConf
 import org.apache.spark.api.python.PythonUtils
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.classic.ColumnConversions._
 import org.apache.spark.sql.connect.PythonTestDepsChecker
 import org.apache.spark.sql.connect.service.SparkConnectService
-import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog, TableCatalog}
 import org.apache.spark.sql.pipelines.Language.Python
+import org.apache.spark.sql.pipelines.autocdc.{ColumnSelection, ScdType, UnqualifiedColumnName}
 import org.apache.spark.sql.pipelines.common.FlowStatus
-import org.apache.spark.sql.pipelines.graph.{DataflowGraph, PipelineUpdateContextImpl, QueryOrigin, QueryOriginType}
-import org.apache.spark.sql.pipelines.logging.EventLevel
+import org.apache.spark.sql.pipelines.graph.{AutoCdcFlow, AutoCdcMergeFlow, DataflowGraph, PipelineUpdateContextImpl, QueryOrigin, QueryOriginType}
+import org.apache.spark.sql.pipelines.logging.{EventLevel, PipelineEvent}
 import org.apache.spark.sql.pipelines.utils.{EventVerificationTestHelpers, TestPipelineUpdateContextMixin}
 import org.apache.spark.sql.types.StructType
 
@@ -51,10 +54,24 @@ class PythonPipelineSuite
     with TestPipelineUpdateContextMixin
     with EventVerificationTestHelpers {
 
-  def buildGraph(pythonText: String): DataflowGraph = {
+  // Register a V2 in-memory catalog so AutoCDC tests can exercise pipeline-default-catalog
+  // inheritance against a name that is never the session default `spark_catalog`. The V2 in-memory
+  // catalog doesn't support streaming reads, but the AutoCDC tests that touch it only run graph
+  // resolution -- not pipeline execution -- so this is sufficient.
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.my_catalog", classOf[InMemoryTableCatalog].getName)
+
+  def buildGraph(
+      pythonText: String,
+      defaultCatalog: Option[String] = None,
+      defaultDatabase: Option[String] = None,
+      setupSql: Option[String] = None): DataflowGraph = {
     val indentedPythonText = pythonText.linesIterator.map("        " + _).mkString("\n")
     // create a unique identifier to allow identifying the session and dataflow graph
     val customSessionIdentifier = UUID.randomUUID().toString
+    val defaultCatalogPyExpr = defaultCatalog.map(c => s""""$c"""").getOrElse("None")
+    val defaultDatabasePyExpr = defaultDatabase.map(d => s""""$d"""").getOrElse("None")
+    val setupSqlLine = setupSql.map(stmt => s"""spark.sql(\"\"\"$stmt\"\"\")""").getOrElse("")
     val pythonCode =
       s"""
          |from pyspark.sql import SparkSession
@@ -76,10 +93,12 @@ class PythonPipelineSuite
          |    .config("spark.custom.identifier", "$customSessionIdentifier") \\
          |    .create()
          |
+         |$setupSqlLine
+         |
          |dataflow_graph_id = create_dataflow_graph(
          |    spark,
-         |    default_catalog=None,
-         |    default_database=None,
+         |    default_catalog=$defaultCatalogPyExpr,
+         |    default_database=$defaultDatabasePyExpr,
          |    sql_conf={},
          |)
          |
@@ -118,6 +137,24 @@ class PythonPipelineSuite
     TableIdentifier(catalog = Option("spark_catalog"), database = Option("default"), table = name)
   }
 
+  /**
+   * Matches a flow progress event's source code location against `expected`, tolerating the
+   * Python-version-dependent line number reported for bare decorators (e.g.
+   * `@dp.materialized_view` with no arguments).
+   *
+   * On Python <= 3.10 a bare decorator reports the source line of the decorated `def`, one line
+   * below the `@decorator` line reported on Python 3.11+. Upstream CI runs Python 3.12 while this
+   * fork's connect CI runs Python 3.10, so accept either the `@decorator` line (`expected.line`)
+   * or the `def` line one below it. Decorators with arguments are unaffected and always report
+   * the `@decorator` line.
+   */
+  private def sourceCodeLocationMatches(event: PipelineEvent, expected: QueryOrigin): Boolean = {
+    event.origin.sourceCodeLocation.exists { actual =>
+      actual.line.exists(l => expected.line.contains(l) || expected.line.contains(l - 1)) &&
+      actual.copy(line = expected.line) == expected
+    }
+  }
+
   test("basic") {
     val graph = buildGraph("""
         |@dp.table
@@ -151,7 +188,7 @@ class PythonPipelineSuite
           QueryOrigin(
             language = Option(Python()),
             filePath = Option("<string>"),
-            line = Option(34),
+            line = Option(36),
             objectName = Option("spark_catalog.default.table1"),
             objectType = Option(QueryOriginType.Flow.toString))),
       errorChecker = ex =>
@@ -160,7 +197,10 @@ class PythonPipelineSuite
       expectedEventLevel = EventLevel.WARN)
   }
 
-  test("flow progress events have correct python source code location") {
+  // Uses testRetry: this exercises live micro-batch streaming flows and is occasionally flaky
+  // under load (e.g. "Race while writing batch 0"). Retry the single test in-process rather than
+  // failing the whole job; ScalaTest suites do not honor surefire's rerunFailingTestsCount.
+  testRetry("flow progress events have correct python source code location") {
     val unresolvedGraph = buildGraph(pythonText = """
         |@dp.table(
         | comment = 'my table'
@@ -199,11 +239,12 @@ class PythonPipelineSuite
         identifier = graphIdentifier("mv2"),
         expectedFlowStatus = flowStatus,
         cond = flowProgressEvent =>
-          flowProgressEvent.origin.sourceCodeLocation == Option(
+          sourceCodeLocationMatches(
+            flowProgressEvent,
             QueryOrigin(
               language = Option(Python()),
               filePath = Option("<string>"),
-              line = Option(40),
+              line = Option(42),
               objectName = Option("spark_catalog.default.mv2"),
               objectType = Option(QueryOriginType.Flow.toString))),
         expectedEventLevel = EventLevel.INFO)
@@ -213,11 +254,12 @@ class PythonPipelineSuite
         identifier = graphIdentifier("mv"),
         expectedFlowStatus = flowStatus,
         cond = flowProgressEvent =>
-          flowProgressEvent.origin.sourceCodeLocation == Option(
+          sourceCodeLocationMatches(
+            flowProgressEvent,
             QueryOrigin(
               language = Option(Python()),
               filePath = Option("<string>"),
-              line = Option(44),
+              line = Option(46),
               objectName = Option("spark_catalog.default.mv"),
               objectType = Option(QueryOriginType.Flow.toString))),
         expectedEventLevel = EventLevel.INFO)
@@ -231,11 +273,12 @@ class PythonPipelineSuite
           identifier = graphIdentifier("table1"),
           expectedFlowStatus = flowStatus,
           cond = flowProgressEvent =>
-            flowProgressEvent.origin.sourceCodeLocation == Option(
+            sourceCodeLocationMatches(
+              flowProgressEvent,
               QueryOrigin(
                 language = Option(Python()),
                 filePath = Option("<string>"),
-                line = Option(34),
+                line = Option(36),
                 objectName = Option("spark_catalog.default.table1"),
                 objectType = Option(QueryOriginType.Flow.toString))),
           expectedEventLevel = EventLevel.INFO)
@@ -245,11 +288,12 @@ class PythonPipelineSuite
           identifier = graphIdentifier("standalone_flow1"),
           expectedFlowStatus = flowStatus,
           cond = flowProgressEvent =>
-            flowProgressEvent.origin.sourceCodeLocation == Option(
+            sourceCodeLocationMatches(
+              flowProgressEvent,
               QueryOrigin(
                 language = Option(Python()),
                 filePath = Option("<string>"),
-                line = Option(49),
+                line = Option(51),
                 objectName = Option("spark_catalog.default.standalone_flow1"),
                 objectType = Option(QueryOriginType.Flow.toString))),
           expectedEventLevel = EventLevel.INFO)
@@ -935,6 +979,367 @@ class PythonPipelineSuite
     assert(ex.getMessage.contains("table_with_wrong_struct_schema"))
   }
 
+  private def buildAutoCdcFlow(pipelineSource: String): AutoCdcFlow = {
+    val graph = buildGraph(pipelineSource)
+    graph.flows
+      .collectFirst { case f: AutoCdcFlow => f }
+      .getOrElse(fail(s"Expected an AutoCdcFlow in the graph, got: ${graph.flows}"))
+  }
+
+  test("AutoCDC API: minimal flow registers an AutoCdcFlow with default name and SCD1 default") {
+    val flow = buildAutoCdcFlow("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |)
+        |""".stripMargin)
+
+    assert(flow.identifier == graphIdentifier("target"))
+    assert(flow.destinationIdentifier == graphIdentifier("target"))
+    assert(flow.changeArgs.keys == Seq(UnqualifiedColumnName("value")))
+    assert(flow.changeArgs.sequencing.expr.sql == "timestamp")
+    assert(flow.changeArgs.deleteCondition.isEmpty)
+    assert(flow.changeArgs.columnSelection.isEmpty)
+    assert(flow.changeArgs.storedAsScdType == ScdType.Type1)
+  }
+
+  test("AutoCDC API: composite keys are forwarded to ChangeArgs in order") {
+    val flow = buildAutoCdcFlow("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value", "timestamp"],
+        |    sequence_by = "timestamp",
+        |)
+        |""".stripMargin)
+
+    assert(
+      flow.changeArgs.keys ==
+        Seq(UnqualifiedColumnName("value"), UnqualifiedColumnName("timestamp")))
+  }
+
+  test("AutoCDC API: apply_as_deletes is forwarded as a delete condition column") {
+    val flow = buildAutoCdcFlow("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |    apply_as_deletes = "value % 2 = 0",
+        |)
+        |""".stripMargin)
+
+    val deleteCondition = flow.changeArgs.deleteCondition.getOrElse(
+      fail("expected apply_as_deletes to populate deleteCondition"))
+    assert(deleteCondition.expr.sql.contains("value"))
+    assert(deleteCondition.expr.sql.contains("0"))
+  }
+
+  test("AutoCDC API: column_list is forwarded as IncludeColumns") {
+    val flow = buildAutoCdcFlow("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |    column_list = ["value", "timestamp"],
+        |)
+        |""".stripMargin)
+
+    assert(
+      flow.changeArgs.columnSelection.contains(ColumnSelection.IncludeColumns(
+        Seq(UnqualifiedColumnName("value"), UnqualifiedColumnName("timestamp")))))
+  }
+
+  test("AutoCDC API: except_column_list is forwarded as ExcludeColumns") {
+    val flow = buildAutoCdcFlow("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |    except_column_list = ["timestamp"],
+        |)
+        |""".stripMargin)
+
+    assert(
+      flow.changeArgs.columnSelection.contains(
+        ColumnSelection.ExcludeColumns(Seq(UnqualifiedColumnName("timestamp")))))
+  }
+
+  test("AutoCDC API: explicit `name` is honored as the flow identifier") {
+    val flow = buildAutoCdcFlow("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |    name = "my_flow",
+        |)
+        |""".stripMargin)
+
+    assert(flow.identifier == graphIdentifier("my_flow"))
+    assert(flow.destinationIdentifier == graphIdentifier("target"))
+  }
+
+  test("AutoCDC API: multi-part `keys` column is rejected at flow registration") {
+    val ex = intercept[RuntimeException] {
+      buildAutoCdcFlow("""
+          |@dp.table
+          |def src():
+          |  return spark.readStream.format("rate").load()
+          |
+          |dp.create_streaming_table("target")
+          |
+          |dp.create_auto_cdc_flow(
+          |    target = "target",
+          |    source = "src",
+          |    keys = ["a.b"],
+          |    sequence_by = "timestamp",
+          |)
+          |""".stripMargin)
+    }
+    assert(ex.getMessage.contains("AUTOCDC_MULTIPART_COLUMN_IDENTIFIER"))
+  }
+
+  test("AutoCDC API: multi-part `column_list` entry is rejected at flow registration") {
+    val ex = intercept[RuntimeException] {
+      buildAutoCdcFlow("""
+          |@dp.table
+          |def src():
+          |  return spark.readStream.format("rate").load()
+          |
+          |dp.create_streaming_table("target")
+          |
+          |dp.create_auto_cdc_flow(
+          |    target = "target",
+          |    source = "src",
+          |    keys = ["value"],
+          |    sequence_by = "timestamp",
+          |    column_list = ["nested.field"],
+          |)
+          |""".stripMargin)
+    }
+    assert(ex.getMessage.contains("AUTOCDC_MULTIPART_COLUMN_IDENTIFIER"))
+  }
+
+  test("AutoCDC API: Column-object form of keys/sequence_by/apply_as_deletes is honored") {
+    val flow = buildAutoCdcFlow("""
+        |from pyspark.sql.functions import col, expr
+        |
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = [col("value")],
+        |    sequence_by = col("timestamp"),
+        |    apply_as_deletes = expr("value % 2 = 0"),
+        |)
+        |""".stripMargin)
+
+    assert(flow.changeArgs.keys == Seq(UnqualifiedColumnName("value")))
+    assert(flow.changeArgs.sequencing.expr.sql == "timestamp")
+    val deleteCondition = flow.changeArgs.deleteCondition.getOrElse(
+      fail("expected apply_as_deletes to populate deleteCondition"))
+    assert(deleteCondition.expr.sql.contains("value"))
+    assert(deleteCondition.expr.sql.contains("0"))
+  }
+
+  test("AutoCDC API: graph resolves with the source streaming table as the flow's input") {
+    val graph = buildGraph("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |)
+        |""".stripMargin).resolve()
+
+    val resolvedFlow = graph.resolvedFlow(graphIdentifier("target"))
+    assert(resolvedFlow.inputs == Set(graphIdentifier("src")))
+  }
+
+  test("AutoCDC API: single-part `source` inherits the pipeline's default catalog and database") {
+    // Use `my_catalog` (registered in `sparkConf`) so the pipeline-default catalog differs from
+    // the session default (`spark_catalog`), and a non-default namespace `my_db` so the
+    // pipeline-default database differs from the session default (`default`). The CREATE NAMESPACE
+    // runs on the same Connect session that subsequently creates the dataflow graph, so the
+    // namespace is visible to that session's per-session V2 catalog instance.
+    val graph = buildGraph(
+      """
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |)
+        |""".stripMargin,
+      defaultCatalog = Some("my_catalog"),
+      defaultDatabase = Some("my_db"),
+      setupSql = Some("CREATE NAMESPACE IF NOT EXISTS my_catalog.my_db")).resolve()
+
+    val resolvedFlow =
+      graph.resolvedFlow(TableIdentifier("target", Some("my_db"), Some("my_catalog")))
+    assert(
+      resolvedFlow.inputs ==
+        Set(TableIdentifier("src", Some("my_db"), Some("my_catalog"))))
+  }
+
+  test("AutoCDC API: multi-part `source` resolves to the corresponding qualified dataset") {
+    val graph = buildGraph("""
+        |@dp.table(name = "some_catalog.some_schema.src")
+        |def irrelevant():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table(name = "some_catalog.some_schema.target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "some_catalog.some_schema.target",
+        |    source = "some_catalog.some_schema.src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |)
+        |""".stripMargin).resolve()
+
+    val targetIdent = TableIdentifier("target", Some("some_schema"), Some("some_catalog"))
+    val srcIdent = TableIdentifier("src", Some("some_schema"), Some("some_catalog"))
+    val resolvedFlow = graph.resolvedFlow(targetIdent)
+    assert(resolvedFlow.inputs == Set(srcIdent))
+  }
+
+  test("AutoCDC API: non-attribute expression in keys is rejected") {
+    val ex = intercept[RuntimeException] {
+      buildGraph("""
+          |from pyspark.sql.functions import expr
+          |
+          |@dp.table
+          |def src():
+          |  return spark.readStream.format("rate").load()
+          |
+          |dp.create_streaming_table("target")
+          |
+          |dp.create_auto_cdc_flow(
+          |    target = "target",
+          |    source = "src",
+          |    keys = [expr("value + 1")],
+          |    sequence_by = "timestamp",
+          |)
+          |""".stripMargin)
+    }
+    assert(ex.getMessage.contains("AUTOCDC_NON_COLUMN_IDENTIFIER"))
+  }
+
+  test("AutoCDC API: specifying both column_list and except_column_list is rejected") {
+    // The Python create_auto_cdc_flow API does not currently enforce the "at most one" contract
+    // client-side, so the proto carries both lists to the server, where the structured error is
+    // raised. If/when a Python-side check is added, this test guards against the server-side
+    // defense being silently bypassed.
+    val ex = intercept[RuntimeException] {
+      buildGraph("""
+          |@dp.table
+          |def src():
+          |  return spark.readStream.format("rate").load()
+          |
+          |dp.create_streaming_table("target")
+          |
+          |dp.create_auto_cdc_flow(
+          |    target = "target",
+          |    source = "src",
+          |    keys = ["value"],
+          |    sequence_by = "timestamp",
+          |    column_list = ["value"],
+          |    except_column_list = ["timestamp"],
+          |)
+          |""".stripMargin)
+    }
+    assert(ex.getMessage.contains("AUTOCDC_BOTH_COLUMN_LIST_AND_EXCEPT_COLUMN_LIST"))
+  }
+
+  test("AutoCDC API: registered flow survives graph resolution and validation end-to-end") {
+    val graph = buildGraph("""
+        |@dp.table
+        |def src():
+        |  return spark.readStream.format("rate").load()
+        |
+        |dp.create_streaming_table("target")
+        |
+        |dp.create_auto_cdc_flow(
+        |    target = "target",
+        |    source = "src",
+        |    keys = ["value"],
+        |    sequence_by = "timestamp",
+        |    apply_as_deletes = "value % 2 = 0",
+        |    column_list = ["value", "timestamp"],
+        |)
+        |""".stripMargin).resolve().validate()
+
+    val resolvedFlow = graph.resolvedFlow(graphIdentifier("target"))
+    assert(resolvedFlow.isInstanceOf[AutoCdcMergeFlow])
+    val mergeFlow = resolvedFlow.asInstanceOf[AutoCdcMergeFlow]
+    assert(mergeFlow.changeArgs.keys == Seq(UnqualifiedColumnName("value")))
+    assert(mergeFlow.changeArgs.sequencing.expr.sql == "timestamp")
+    assert(mergeFlow.changeArgs.deleteCondition.isDefined)
+    assert(
+      mergeFlow.changeArgs.columnSelection.contains(ColumnSelection.IncludeColumns(
+        Seq(UnqualifiedColumnName("value"), UnqualifiedColumnName("timestamp")))))
+    assert(mergeFlow.changeArgs.storedAsScdType == ScdType.Type1)
+  }
+
   /**
    * Executes Python code in a separate process and returns the exit code.
    *
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
index 657be7b7954b5..6a9656e5d6f8e 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
@@ -17,19 +17,31 @@
 package org.apache.spark.sql.connect.planner
 
 import java.util.UUID
+import java.util.concurrent.CountDownLatch
 
+import org.mockito.Mockito.atLeastOnce
 import org.mockito.Mockito.times
 import org.mockito.Mockito.verify
 import org.mockito.Mockito.when
 import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.sql.connect.SparkConnectTestUtils
+import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.streaming.StreamingQuery
 import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.sql.test.SharedSparkSession
 
 class StreamingForeachBatchHelperSuite extends SharedSparkSession with MockitoSugar {
 
+  // A session holder that is NOT registered in the global SparkConnectService.sessionManager
+  // (unlike SparkConnectTestUtils.createDummySessionHolder). The closing-session tests below call
+  // sessionHolder.close() directly, bypassing the manager; a holder registered in the manager's
+  // store at that point would stay there as a closed entry, and a later suite's
+  // invalidateAllSessions() would fail closing it a second time (SESSION_ALREADY_CLOSED).
+  private def unregisteredSessionHolder(): SessionHolder = {
+    SessionHolder(userId = "testUser", sessionId = UUID.randomUUID().toString, session = spark)
+  }
+
   private def mockQuery(): StreamingQuery = {
     val query = mock[StreamingQuery]
     val (queryId, runId) = (UUID.randomUUID(), UUID.randomUUID())
@@ -78,4 +90,106 @@ class StreamingForeachBatchHelperSuite extends SharedSparkSession with MockitoSu
     // No more entries left in it now.
     assert(cache.listEntriesForTesting().isEmpty)
   }
+
+  test("CleanerCache: a runner registered for a closing session is cleaned up immediately") {
+    // Mirrors the SparkConnectStreamingQueryCache shutdown-race guard. A runner registered after
+    // SessionHolder.close() has already run cleanUpAll() (for a query started concurrently with
+    // close()) would otherwise be missed by both reapers and strand a Python worker.
+    val cleaner = mock[AutoCloseable]
+    val query = mockQuery()
+    val sessionHolder = unregisteredSessionHolder()
+    val cache = new StreamingForeachBatchHelper.CleanerCache(sessionHolder)
+
+    // Mark the session as closing before the runner is registered.
+    sessionHolder.close()
+    assert(sessionHolder.isClosing)
+
+    val listenersBefore = spark.streams.listListeners().length
+    cache.registerCleanerForQuery(query, cleaner)
+
+    // The runner must not be stranded: it is closed and never cached.
+    verify(cleaner, times(1)).close()
+    assert(cache.listEntriesForTesting().isEmpty)
+    // The fast path must not register a (leaking) listener on the closing session's streams.
+    assert(spark.streams.listListeners().length == listenersBefore)
+  }
+
+  test("CleanerCache.cleanUpAll unregisters the streaming listener") {
+    // close() does not remove the StreamingRunnerCleanerListener (it is not tracked in the
+    // session's listenerCache), so cleanUpAll() must drop it; otherwise the listener keeps the
+    // cache / session reachable after the session is closed.
+    val cleaner = mock[AutoCloseable]
+    val query = mockQuery()
+    val cache = new StreamingForeachBatchHelper.CleanerCache(
+      SparkConnectTestUtils.createDummySessionHolder(spark))
+
+    cache.registerCleanerForQuery(query, cleaner)
+    val listener = cache.listenerForTesting
+    assert(spark.streams.listListeners().contains(listener))
+
+    cache.cleanUpAll()
+
+    verify(cleaner, times(1)).close()
+    assert(!spark.streams.listListeners().contains(listener))
+  }
+
+  test("CleanerCache: listener is recoverable -- re-registered after cleanUpAll") {
+    // streamingListener is no longer a one-shot lazy val: after cleanUpAll() removes it, a later
+    // registration must re-add a working listener so the cache is safe to reuse.
+    val cache = new StreamingForeachBatchHelper.CleanerCache(
+      SparkConnectTestUtils.createDummySessionHolder(spark))
+
+    cache.registerCleanerForQuery(mockQuery(), mock[AutoCloseable])
+    val firstListener = cache.listenerForTesting
+    assert(spark.streams.listListeners().contains(firstListener))
+
+    cache.cleanUpAll()
+    assert(!spark.streams.listListeners().contains(firstListener))
+
+    // Reuse: a new registration re-registers a listener on session.streams.
+    cache.registerCleanerForQuery(mockQuery(), mock[AutoCloseable])
+    val secondListener = cache.listenerForTesting
+    assert(spark.streams.listListeners().contains(secondListener))
+
+    cache.cleanUpAll()
+    assert(!spark.streams.listListeners().contains(secondListener))
+  }
+
+  test("CleanerCache: registration racing with session shutdown strands no runner or listener") {
+    // Mirrors the SparkConnectStreamingQueryCache race test for the foreachBatch cleaner:
+    // registration runs concurrently with the shutdown sequence (close() marks the session closing,
+    // then cleanUpAll() reaps runners and the listener). Whatever the interleaving, the runner must
+    // be closed and no listener may be left registered on session.streams.
+    val baselineListeners = spark.streams.listListeners().length
+    val numIterations = 200
+    (1 to numIterations).foreach { _ =>
+      val cleaner = mock[AutoCloseable]
+      val query = mockQuery()
+      val sessionHolder = unregisteredSessionHolder()
+      val cache = new StreamingForeachBatchHelper.CleanerCache(sessionHolder)
+
+      val startLatch = new CountDownLatch(1)
+      val closeThread = new Thread(() => {
+        startLatch.await()
+        sessionHolder.close() // Marks the session closing.
+        cache.cleanUpAll() // Mirrors close()'s runner + listener reaping.
+      })
+      val registerThread = new Thread(() => {
+        startLatch.await()
+        cache.registerCleanerForQuery(query, cleaner)
+      })
+      closeThread.start()
+      registerThread.start()
+      startLatch.countDown()
+      closeThread.join()
+      registerThread.join()
+
+      // The runner must be closed by one of the paths: the fast path, the post-insert guard, or
+      // cleanUpAll(). registerCleanerForQuery and cleanUpAll are synchronous, so this is settled
+      // once both threads have joined.
+      verify(cleaner, atLeastOnce()).close()
+    }
+    // No iteration may leave a listener registered on the shared streams manager.
+    assert(spark.streams.listListeners().length == baselineListeners)
+  }
 }
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala
index 75d7fdd4d8848..8094a084d3b65 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala
@@ -48,6 +48,12 @@ class AddArtifactsHandlerSuite extends SharedSparkSession with ResourceHelper {
   private val sessionId = UUID.randomUUID.toString()
   private val sessionKey = SessionKey("c1", sessionId)
 
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    SparkConnectService.sessionManager.invalidateAllSessions()
+    SparkConnectService.sessionManager.initializeBaseSession(() => spark.newSession())
+  }
+
   class DummyStreamObserver(p: Promise[AddArtifactsResponse])
       extends StreamObserver[AddArtifactsResponse] {
     override def onNext(v: AddArtifactsResponse): Unit = p.success(v)
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
index 17402ab5ddb43..cff5f345d2573 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
@@ -19,11 +19,13 @@ package org.apache.spark.sql.connect.service
 
 import java.nio.charset.StandardCharsets
 import java.nio.file.Files
+import java.util.concurrent.{TimeoutException, TimeUnit}
 
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 import scala.sys.process.Process
 import scala.util.Random
+import scala.util.control.NonFatal
 
 import com.google.common.collect.Lists
 import org.scalatest.time.SpanSugar._
@@ -37,8 +39,10 @@ import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.planner.{PythonStreamingQueryListener, SparkConnectPlanner, StreamingForeachBatchHelper}
 import org.apache.spark.sql.connect.planner.StreamingForeachBatchHelper.RunnerCleaner
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.pipelines.graph.{DataflowGraph, PipelineUpdateContextImpl}
 import org.apache.spark.sql.pipelines.logging.PipelineEvent
+import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.ArrayImplicits._
 
@@ -228,15 +232,117 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
     }
   }
 
-  test("python foreachBatch process: process terminates after query is stopped") {
-    // scalastyle:off assume
-    assume(IntegratedUDFTestUtils.shouldTestPandasUDFs)
-    assume(PythonTestDepsChecker.isConnectDepsAvailable)
-    // scalastyle:on assume
+  // Log and swallow best-effort cleanup failures so they do not mask a primary test
+  // failure. InterruptedException re-asserts the interrupt flag on the current thread;
+  // fatal errors (OOM, StackOverflow, LinkageError) propagate.
+  private def runQuietly(label: String, op: => Unit): Unit = {
+    try op
+    catch {
+      case _: InterruptedException => Thread.currentThread().interrupt()
+      case NonFatal(t) =>
+        // scalastyle:off println
+        println(s"===== $label suppressed ${t.getClass.getSimpleName}: ${t.getMessage} =====")
+      // scalastyle:on println
+    }
+  }
+
+  // Same semantics as SparkFunSuite.retry, but prints to stdout so retries show up in the
+  // GitHub Actions job log (SparkFunSuite.retry's log4j output only lands in
+  // target/unit-tests.log, surfaced as an artifact rather than in the live log).
+  private def retryWithVisibleLog(maxAttempts: Int)(body: => Unit): Unit = {
+    var attempt = 1
+    var done = false
+    while (!done) {
+      try {
+        body
+        done = true
+      } catch {
+        case NonFatal(t) if attempt >= maxAttempts => throw t
+        case NonFatal(t) =>
+          // scalastyle:off println
+          println(
+            s"===== Attempt $attempt/$maxAttempts failed " +
+              s"(${t.getClass.getSimpleName}: ${t.getMessage}); retrying =====")
+          // scalastyle:on println
+          // A leaked worker from this attempt may still hold sockets/listeners; do not
+          // let afterEach/beforeEach throwing on that residual state abort the retry loop.
+          runQuietly("afterEach", afterEach())
+          runQuietly("beforeEach", beforeEach())
+          attempt += 1
+      }
+    }
+  }
+
+  private def awaitTestBodyInNewThread(timeoutMillis: Long, onTimeout: () => Unit)(
+      body: => Unit): Unit = {
+    @volatile var error: Throwable = null
+    val runnable: Runnable = () => {
+      try {
+        body
+      } catch {
+        case t: Throwable => error = t
+      }
+    }
+    val worker = new Thread(runnable, s"${getClass.getSimpleName}-testBody-worker")
+    worker.setDaemon(true)
+    worker.start()
+    worker.join(timeoutMillis)
+    if (worker.isAlive) {
+      // Capture the worker's stack so post-mortem diagnostics can identify which leaked
+      // thread belongs to which attempt without a separate jstack.
+      // scalastyle:off println
+      println(
+        s"===== Test body did not complete within $timeoutMillis ms " +
+          s"(thread=${worker.getName}, state=${worker.getState}); stack trace follows =====")
+      worker.getStackTrace.foreach(frame => println(s"  at $frame"))
+      // scalastyle:on println
+      // Best-effort: release any resource the worker is blocked on so it can unwind its own
+      // finally and stop holding global state (SparkConnectService, listeners, ...).
+      onTimeout()
+      // Also interrupt the worker so any interruptible blocking call (e.g. the Thread.join
+      // inside StreamExecution.interruptAndAwaitExecutionThreadTermination) wakes up.
+      worker.interrupt()
+      // Grace period for the now-unblocked worker to run its own finally
+      // (SparkConnectService.stop() then the ~4s settle sleep).
+      val gracePeriodMs = 30.seconds.toMillis
+      worker.join(gracePeriodMs)
+      val te = new TimeoutException(
+        s"Test body did not complete within $timeoutMillis ms " +
+          s"(after a $gracePeriodMs ms post-cleanup grace period)")
+      // If the body finished during the grace window, surface the original failure
+      // as the cause so a slow assertion failure is not misreported as a pure hang.
+      if (!worker.isAlive && error != null) te.initCause(error)
+      throw te
+    }
+    if (error != null) throw error
+  }
+
+  private def runPythonForeachBatchTerminationTestBody(sessionHolder: SessionHolder): Unit = {
+    // Unique query names per attempt: a leaked query from a timed-out attempt may still
+    // occupy the old name in spark.streams.active.
+    val suffix = s"_${System.nanoTime()}"
+    val q1Name = s"foreachBatch_termination_test_q1$suffix"
+    val q2Name = s"foreachBatch_termination_test_q2$suffix"
+
+    // Snapshot listeners before this attempt registers anything so we can scope cleanup and
+    // assertions to listeners we added -- even if a previous timed-out attempt leaked a worker
+    // whose own finally is racing with us.
+    val baselineListeners = spark.streams.listListeners().toSet
+    var capturedServer: AnyRef = null
+    var ourNewListeners = Set.empty[StreamingQueryListener]
 
-    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     try {
+      // A previous timed-out attempt's leaked worker may still hold `started=true`, which
+      // would make `start()` below a no-op and cause this attempt to share (and later
+      // re-stop) the stale server. Force-stop first so `start()` creates a fresh instance;
+      // the identity check in `finally` then distinguishes attempts.
+      if (SparkConnectService.started) {
+        runQuietly("stale SparkConnectService.stop()", SparkConnectService.stop())
+      }
       SparkConnectService.start(spark.sparkContext)
+      // Identity-check the server in `finally`: a previous attempt's leaked finally must
+      // not tear down a service belonging to a later attempt.
+      capturedServer = SparkConnectService.server
 
       val pythonFn = dummyPythonFunction(sessionHolder)(streamingForeachBatchFunction)
       val (fn1, cleaner1) =
@@ -249,7 +355,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
         .load()
         .writeStream
         .format("memory")
-        .queryName("foreachBatch_termination_test_q1")
+        .queryName(q1Name)
         .foreachBatch(fn1)
         .start()
 
@@ -258,7 +364,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
         .load()
         .writeStream
         .format("memory")
-        .queryName("foreachBatch_termination_test_q2")
+        .queryName(q2Name)
         .foreachBatch(fn2)
         .start()
 
@@ -267,6 +373,10 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
       sessionHolder.streamingForeachBatchRunnerCleanerCache
         .registerCleanerForQuery(query2, cleaner2)
 
+      // The first registerCleanerForQuery lazily registers the cleaner listener. Capture the
+      // listeners we added so finally only removes ours, not a concurrent attempt's.
+      ourNewListeners = spark.streams.listListeners().toSet -- baselineListeners
+
       val (runner1, runner2) =
         (cleaner1.asInstanceOf[RunnerCleaner].runner, cleaner2.asInstanceOf[RunnerCleaner].runner)
 
@@ -288,14 +398,58 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
         assert(runner2.isWorkerStopped().get)
       }
 
-      assert(spark.streams.active.isEmpty) // no running query
-      assert(spark.streams.listListeners().length == 1) // only process termination listener
+      // Only assert this attempt's queries stopped; a previous timed-out attempt may have
+      // leaked queries into spark.streams.active that we cannot synchronously clean up.
+      assert(!spark.streams.active.exists(q => q.name == q1Name || q.name == q2Name))
+      // Scoped to this attempt: exactly one new listener (the cleaner listener) should
+      // have been registered, regardless of any listeners leaked by a prior attempt.
+      assert(
+        ourNewListeners.size == 1,
+        s"expected exactly 1 new listener registered by this attempt, " +
+          s"got ${ourNewListeners.size}")
     } finally {
-      SparkConnectService.stop()
-      // Wait for things to calm down.
-      Thread.sleep(4.seconds.toMillis)
-      // remove process termination listener
-      spark.streams.listListeners().foreach(spark.streams.removeListener)
+      // Only stop the service if it is still the one this attempt started; otherwise a
+      // previous attempt's leaked finally would tear down the live service of the current
+      // attempt.
+      if (capturedServer != null && (SparkConnectService.server eq capturedServer)) {
+        // Cleanup is best-effort: any failure must not mask the primary failure in the
+        // try block, and the listener cleanup below must still run.
+        runQuietly("SparkConnectService.stop()", SparkConnectService.stop())
+        runQuietly("settle sleep", Thread.sleep(4.seconds.toMillis))
+      }
+      // Remove only the listeners this attempt registered; never touch a concurrent
+      // attempt's process-termination listener. Wrapped in `runQuietly` so a throw here
+      // cannot mask a primary failure in the try block.
+      runQuietly("removeListeners", ourNewListeners.foreach(spark.streams.removeListener))
+    }
+  }
+
+  test("python foreachBatch process: process terminates after query is stopped") {
+    // scalastyle:off assume
+    assume(IntegratedUDFTestUtils.shouldTestPandasUDFs)
+    assume(PythonTestDepsChecker.isConnectDepsAvailable)
+    // scalastyle:on assume
+
+    // Bound query.stop() so it cannot hang indefinitely: spark.sql.streaming.stopTimeout
+    // defaults to 0 (wait forever), which turns a stuck batch into an unkillable test.
+    // 30s is small enough to fit under the outer per-attempt cap with room to spare.
+    withSQLConf(SQLConf.STREAMING_STOP_TIMEOUT.key -> "30000") {
+      retryWithVisibleLog(maxAttempts = 3) {
+        // Run the body on a fresh daemon thread so the test thread can recover from a
+        // hang in a non-interruptible socket read. SessionHolder is created outside the
+        // body so onTimeout can close its Python worker sockets via cleanerCache; that
+        // unblocks the hung dataIn.readInt so the leaked thread's finally can settle
+        // before the next retry. 2-minute cap strictly bounds the original 150-minute hang.
+        val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
+        awaitTestBodyInNewThread(
+          timeoutMillis = TimeUnit.MINUTES.toMillis(2),
+          onTimeout = () =>
+            runQuietly(
+              "onTimeout cleanUpAll",
+              sessionHolder.streamingForeachBatchRunnerCleanerCache.cleanUpAll())) {
+          runPythonForeachBatchTerminationTestBody(sessionHolder)
+        }
+      }
     }
   }
 
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
index 6a133f87c1b11..1a335c879a8cb 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.sql.connect.service
 
 import java.util.UUID
+import java.util.concurrent.CountDownLatch
 
 import scala.concurrent.duration.DurationInt
 
-import org.mockito.Mockito.when
+import org.mockito.Mockito.{atLeastOnce, doThrow, verify, when}
 import org.scalatest.concurrent.Eventually.eventually
 import org.scalatest.concurrent.Futures.timeout
 import org.scalatestplus.mockito.MockitoSugar
@@ -156,4 +157,135 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
     }
     sessionMgr.shutdown()
   }
+
+  test("Query registered when the session is already closing is stopped and dropped") {
+    // Tests the closing-session guard in registerNewStreamingQuery in isolation (the session is
+    // already marked closing before the query is registered). The concurrent race is covered by the
+    // next test.
+
+    val queryId = UUID.randomUUID().toString
+    val runId = UUID.randomUUID().toString
+    val tag = "test_tag"
+    val mockSession = mock[SparkSession]
+    val mockQuery = mock[StreamingQuery]
+    val mockStreamingQueryManager = mock[StreamingQueryManager]
+
+    val sessionHolder =
+      SessionHolder(userId = "test_user_1", sessionId = "test_session_1", session = mockSession)
+
+    val sessionMgr = createSessionManager()
+
+    when(mockQuery.id).thenReturn(UUID.fromString(queryId))
+    when(mockQuery.runId).thenReturn(UUID.fromString(runId))
+    when(mockQuery.isActive).thenReturn(true)
+    when(mockSession.streams).thenReturn(mockStreamingQueryManager)
+    when(mockStreamingQueryManager.get(queryId)).thenReturn(mockQuery)
+
+    // Mark the session as closing. close() sets closedTimeMs and, for a not-yet-started session in
+    // tests, returns early without running the rest of the cleanup.
+    sessionHolder.close()
+    assert(sessionHolder.isClosing)
+
+    sessionMgr.registerNewStreamingQuery(sessionHolder, mockQuery, Set(tag), "")
+
+    // The query must be stopped and, once stopped, dropped from the cache (directly and by tag).
+    // The cleanup removes the entry by query identity (computeIfPresent matching current.query eq
+    // query), not by case-class value equality, so removal still succeeds even if the maintenance
+    // thread concurrently rewrites the entry's expiresAtMs after seeing the just-stopped query.
+    // That specific maintenance-vs-cleanup interleaving is not exercised as a separate test
+    // because it is not deterministically reproducible; the identity match makes it correct.
+    eventually(timeout(1.minute)) {
+      verify(mockQuery).stop()
+      assert(sessionMgr.getCachedValue(queryId, runId).isEmpty)
+      assert(!sessionMgr.taggedQueries.containsKey(tag))
+    }
+    sessionMgr.shutdown()
+  }
+
+  test("Query registered for a closing session is retained when stopping it fails") {
+    // If stopping the query fails, the cache entry must NOT be dropped: removing it would discard
+    // the only server-side handle to a possibly still-running query, re-creating the leak. The
+    // entry is kept so a later cleanup / maintenance pass can reap it.
+    val queryId = UUID.randomUUID().toString
+    val runId = UUID.randomUUID().toString
+    val mockSession = mock[SparkSession]
+    val mockQuery = mock[StreamingQuery]
+    val mockStreamingQueryManager = mock[StreamingQueryManager]
+
+    val sessionHolder =
+      SessionHolder(userId = "test_user_1", sessionId = "test_session_1", session = mockSession)
+
+    val sessionMgr = createSessionManager()
+
+    when(mockQuery.id).thenReturn(UUID.fromString(queryId))
+    when(mockQuery.runId).thenReturn(UUID.fromString(runId))
+    when(mockQuery.isActive).thenReturn(true)
+    when(mockSession.streams).thenReturn(mockStreamingQueryManager)
+    when(mockStreamingQueryManager.get(queryId)).thenReturn(mockQuery)
+    doThrow(new RuntimeException("stop failed")).when(mockQuery).stop()
+
+    sessionHolder.close()
+    sessionMgr.registerNewStreamingQuery(sessionHolder, mockQuery, Set("test_tag"), "")
+
+    // The stop is attempted...
+    eventually(timeout(1.minute)) {
+      verify(mockQuery).stop()
+    }
+    // ...but because it failed, the entry is retained rather than dropped.
+    assert(sessionMgr.getCachedValue(queryId, runId).nonEmpty)
+    sessionMgr.shutdown()
+  }
+
+  test("Query registration racing with session shutdown leaves no query running") {
+    // Exercises the actual race: registerNewStreamingQuery runs concurrently with the session
+    // shutdown sequence (close() sets closedTimeMs, then cleanupRunningQueries() stops the
+    // session's queries by iterating the cache). Whatever the interleaving, the query must end up
+    // stopped and never stranded (left running while holding a reference to the closed session).
+    val sessionMgr = createSessionManager()
+    val numIterations = 200
+    try {
+      (1 to numIterations).foreach { i =>
+        val queryId = UUID.randomUUID().toString
+        val runId = UUID.randomUUID().toString
+        val mockSession = mock[SparkSession]
+        val mockQuery = mock[StreamingQuery]
+        val mockStreamingQueryManager = mock[StreamingQueryManager]
+        when(mockQuery.id).thenReturn(UUID.fromString(queryId))
+        when(mockQuery.runId).thenReturn(UUID.fromString(runId))
+        when(mockQuery.isActive).thenReturn(true)
+        when(mockSession.streams).thenReturn(mockStreamingQueryManager)
+        when(mockStreamingQueryManager.get(queryId)).thenReturn(mockQuery)
+
+        val sessionHolder = SessionHolder(
+          userId = "test_user",
+          sessionId = s"test_session_$i",
+          session = mockSession)
+
+        // Release both threads together to maximize the chance of interleaving.
+        val startLatch = new CountDownLatch(1)
+        val closeThread = new Thread(() => {
+          startLatch.await()
+          sessionHolder.close() // Sets closedTimeMs, i.e. isClosing.
+          sessionMgr.cleanupRunningQueries(sessionHolder) // Mirrors close()'s query cleanup.
+        })
+        val registerThread = new Thread(() => {
+          startLatch.await()
+          sessionMgr.registerNewStreamingQuery(sessionHolder, mockQuery, Set.empty[String], "")
+        })
+        closeThread.start()
+        registerThread.start()
+        startLatch.countDown()
+        closeThread.join()
+        registerThread.join()
+
+        // Whatever the interleaving, the query must have been stopped by one of the two paths
+        // (registration's closing-session branch stops asynchronously, hence eventually()).
+        eventually(timeout(10.seconds)) {
+          verify(mockQuery, atLeastOnce()).stop()
+        }
+      }
+    } finally {
+      sessionMgr.shutdown()
+    }
+  }
 }
diff --git a/sql/connect/shims/pom.xml b/sql/connect/shims/pom.xml
index 4b716c4cc2189..698b8129940a8 100644
--- a/sql/connect/shims/pom.xml
+++ b/sql/connect/shims/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
index 1ffe8b80edce0..d3ae32270d4b8 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                  983           1003          28         10.2          98.3       1.0X
-date + interval(m, d)                               949            954           8         10.5          94.9       1.0X
-date + interval(m, d, ms)                          3751           3807          79          2.7         375.1       0.3X
-date - interval(m)                                  847            852           4         11.8          84.7       1.2X
-date - interval(m, d)                               867            872           7         11.5          86.7       1.1X
-date - interval(m, d, ms)                          3765           3767           3          2.7         376.5       0.3X
-timestamp + interval(m)                            1537           1543           9          6.5         153.7       0.6X
-timestamp + interval(m, d)                         1575           1578           5          6.4         157.5       0.6X
-timestamp + interval(m, d, ms)                     1716           1717           2          5.8         171.6       0.6X
-timestamp - interval(m)                            1511           1517           7          6.6         151.1       0.7X
-timestamp - interval(m, d)                         1573           1574           1          6.4         157.3       0.6X
-timestamp - interval(m, d, ms)                     1708           1715           9          5.9         170.8       0.6X
+date + interval(m)                                  826            872          75         12.1          82.6       1.0X
+date + interval(m, d)                               822            856          33         12.2          82.2       1.0X
+date + interval(m, d, ms)                          3529           3537          11          2.8         352.9       0.2X
+date - interval(m)                                  791            799           8         12.6          79.1       1.0X
+date - interval(m, d)                               823            837          12         12.2          82.3       1.0X
+date - interval(m, d, ms)                          3537           3555          25          2.8         353.7       0.2X
+timestamp + interval(m)                            1842           1857          22          5.4         184.2       0.4X
+timestamp + interval(m, d)                         1886           1889           5          5.3         188.6       0.4X
+timestamp + interval(m, d, ms)                     1934           1942          11          5.2         193.4       0.4X
+timestamp - interval(m)                            1708           1715          11          5.9         170.8       0.5X
+timestamp - interval(m, d)                         1747           1748           2          5.7         174.7       0.5X
+timestamp - interval(m, d, ms)                     1896           1905          13          5.3         189.6       0.4X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    213            214           2         47.0          21.3       1.0X
-cast to timestamp wholestage on                     221            226           4         45.2          22.1       1.0X
+cast to timestamp wholestage off                    213            220          10         47.0          21.3       1.0X
+cast to timestamp wholestage on                     204            207           5         49.1          20.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    623            630          11         16.1          62.3       1.0X
-year of timestamp wholestage on                     640            645           6         15.6          64.0       1.0X
+year of timestamp wholestage off                    634            640           9         15.8          63.4       1.0X
+year of timestamp wholestage on                     636            646           9         15.7          63.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 658            663           8         15.2          65.8       1.0X
-quarter of timestamp wholestage on                  666            670           4         15.0          66.6       1.0X
+quarter of timestamp wholestage off                 672            675           4         14.9          67.2       1.0X
+quarter of timestamp wholestage on                  684            692           6         14.6          68.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   639            639           0         15.6          63.9       1.0X
-month of timestamp wholestage on                    648            652           5         15.4          64.8       1.0X
+month of timestamp wholestage off                   671            680          13         14.9          67.1       1.0X
+month of timestamp wholestage on                    672            680          10         14.9          67.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1039           1041           3          9.6         103.9       1.0X
-weekofyear of timestamp wholestage on              1041           1046           5          9.6         104.1       1.0X
+weekofyear of timestamp wholestage off              960            962           4         10.4          96.0       1.0X
+weekofyear of timestamp wholestage on               970            988          13         10.3          97.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     647            650           4         15.5          64.7       1.0X
-day of timestamp wholestage on                      652            656           3         15.3          65.2       1.0X
+day of timestamp wholestage off                     656            657           1         15.3          65.6       1.0X
+day of timestamp wholestage on                      663            670           5         15.1          66.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               675            678           5         14.8          67.5       1.0X
-dayofyear of timestamp wholestage on                688            690           1         14.5          68.8       1.0X
+dayofyear of timestamp wholestage off               677            684          10         14.8          67.7       1.0X
+dayofyear of timestamp wholestage on                700            704           4         14.3          70.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              660            660           0         15.1          66.0       1.0X
-dayofmonth of timestamp wholestage on               655            660           4         15.3          65.5       1.0X
+dayofmonth of timestamp wholestage off              666            671           7         15.0          66.6       1.0X
+dayofmonth of timestamp wholestage on               658            666           6         15.2          65.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               803            807           6         12.5          80.3       1.0X
-dayofweek of timestamp wholestage on                818            827          10         12.2          81.8       1.0X
+dayofweek of timestamp wholestage off               777            796          27         12.9          77.7       1.0X
+dayofweek of timestamp wholestage on                788            796          10         12.7          78.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 746            746           0         13.4          74.6       1.0X
-weekday of timestamp wholestage on                  756            761           4         13.2          75.6       1.0X
+weekday of timestamp wholestage off                 747            748           0         13.4          74.7       1.0X
+weekday of timestamp wholestage on                  762            770           9         13.1          76.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    546            547           1         18.3          54.6       1.0X
-hour of timestamp wholestage on                     558            559           1         17.9          55.8       1.0X
+hour of timestamp wholestage off                    589            590           1         17.0          58.9       1.0X
+hour of timestamp wholestage on                     598            604           7         16.7          59.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  543            544           1         18.4          54.3       1.0X
-minute of timestamp wholestage on                   553            556           3         18.1          55.3       1.0X
+minute of timestamp wholestage off                  617            618           1         16.2          61.7       1.0X
+minute of timestamp wholestage on                   596            603           8         16.8          59.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  544            562          26         18.4          54.4       1.0X
-second of timestamp wholestage on                   557            561           5         17.9          55.7       1.0X
+second of timestamp wholestage off                  595            612          24         16.8          59.5       1.0X
+second of timestamp wholestage on                   599            610           9         16.7          59.9       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         184            187           4         54.2          18.4       1.0X
-current_date wholestage on                          218            222           4         46.0          21.8       0.8X
+current_date wholestage off                         181            187           8         55.2          18.1       1.0X
+current_date wholestage on                          201            211          14         49.7          20.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    197            199           3         50.9          19.7       1.0X
-current_timestamp wholestage on                     231            239           8         43.3          23.1       0.9X
+current_timestamp wholestage off                    193            198           6         51.7          19.3       1.0X
+current_timestamp wholestage on                     212            223          17         47.1          21.2       0.9X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         600            601           1         16.7          60.0       1.0X
-cast to date wholestage on                          604            607           3         16.5          60.4       1.0X
+cast to date wholestage off                         611            628          23         16.4          61.1       1.0X
+cast to date wholestage on                          609            617           5         16.4          60.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             671            672           2         14.9          67.1       1.0X
-last_day wholestage on                              696            700           3         14.4          69.6       1.0X
+last_day wholestage off                             674            678           7         14.8          67.4       1.0X
+last_day wholestage on                              683            692           9         14.6          68.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             629            631           3         15.9          62.9       1.0X
-next_day wholestage on                              635            640           3         15.7          63.5       1.0X
+next_day wholestage off                             650            653           4         15.4          65.0       1.0X
+next_day wholestage on                              638            641           5         15.7          63.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             578            579           2         17.3          57.8       1.0X
-date_add wholestage on                              605            609           5         16.5          60.5       1.0X
+date_add wholestage off                             596            602           9         16.8          59.6       1.0X
+date_add wholestage on                              595            602           9         16.8          59.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             579            581           2         17.3          57.9       1.0X
-date_sub wholestage on                              605            608           3         16.5          60.5       1.0X
+date_sub wholestage off                             599            607          11         16.7          59.9       1.0X
+date_sub wholestage on                              602            607           8         16.6          60.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                           802            804           3         12.5          80.2       1.0X
-add_months wholestage on                            830            834           4         12.1          83.0       1.0X
+add_months wholestage off                           817            818           1         12.2          81.7       1.0X
+add_months wholestage on                            811            818           8         12.3          81.1       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3329           3330           1          3.0         332.9       1.0X
-format date wholestage on                          3471           3493          23          2.9         347.1       1.0X
+format date wholestage off                         2731           2744          19          3.7         273.1       1.0X
+format date wholestage on                          2750           2785          45          3.6         275.0       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       2655           2656           2          3.8         265.5       1.0X
-from_unixtime wholestage on                        2677           2687           8          3.7         267.7       1.0X
+from_unixtime wholestage off                       2936           2939           4          3.4         293.6       1.0X
+from_unixtime wholestage on                        2766           2779          15          3.6         276.6       1.1X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   713            714           1         14.0          71.3       1.0X
-from_utc_timestamp wholestage on                    778            779           1         12.9          77.8       0.9X
+from_utc_timestamp wholestage off                   729            731           4         13.7          72.9       1.0X
+from_utc_timestamp wholestage on                    790            799           7         12.7          79.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                     747            750           4         13.4          74.7       1.0X
-to_utc_timestamp wholestage on                      850            854           4         11.8          85.0       0.9X
+to_utc_timestamp wholestage off                     860            864           6         11.6          86.0       1.0X
+to_utc_timestamp wholestage on                      870            878           7         11.5          87.0       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        245            251           9         40.8          24.5       1.0X
-cast interval wholestage on                         220            226           4         45.4          22.0       1.1X
+cast interval wholestage off                        219            239          28         45.6          21.9       1.0X
+cast interval wholestage on                         212            220           6         47.2          21.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                             985            994          13         10.2          98.5       1.0X
-datediff wholestage on                              995            997           2         10.0          99.5       1.0X
+datediff wholestage off                            1033           1035           3          9.7         103.3       1.0X
+datediff wholestage on                             1047           1053           6          9.6         104.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      2736           2738           3          3.7         273.6       1.0X
-months_between wholestage on                       2771           2776           5          3.6         277.1       1.0X
+months_between wholestage off                      3185           3191           8          3.1         318.5       1.0X
+months_between wholestage on                       3224           3231           9          3.1         322.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               406            407           2          2.5         405.9       1.0X
-window wholestage on                                651            684          25          1.5         650.8       0.6X
+window wholestage off                               529            538          12          1.9         529.2       1.0X
+window wholestage on                                659            668           9          1.5         658.8       0.8X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1433           1434           0          7.0         143.3       1.0X
-date_trunc YEAR wholestage on                      1392           1395           2          7.2         139.2       1.0X
+date_trunc YEAR wholestage off                     1667           1671           6          6.0         166.7       1.0X
+date_trunc YEAR wholestage on                      1664           1670           6          6.0         166.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1432           1434           2          7.0         143.2       1.0X
-date_trunc YYYY wholestage on                      1395           1399           4          7.2         139.5       1.0X
+date_trunc YYYY wholestage off                     1663           1665           3          6.0         166.3       1.0X
+date_trunc YYYY wholestage on                      1655           1666          10          6.0         165.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1432           1442          14          7.0         143.2       1.0X
-date_trunc YY wholestage on                        1393           1394           2          7.2         139.3       1.0X
+date_trunc YY wholestage off                       1666           1670           6          6.0         166.6       1.0X
+date_trunc YY wholestage on                        1660           1674          14          6.0         166.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1420           1423           5          7.0         142.0       1.0X
-date_trunc MON wholestage on                       1425           1429           3          7.0         142.5       1.0X
+date_trunc MON wholestage off                      1697           1700           5          5.9         169.7       1.0X
+date_trunc MON wholestage on                       1678           1696          24          6.0         167.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1423           1424           0          7.0         142.3       1.0X
-date_trunc MONTH wholestage on                     1422           1426           2          7.0         142.2       1.0X
+date_trunc MONTH wholestage off                    1709           1716          10          5.9         170.9       1.0X
+date_trunc MONTH wholestage on                     1671           1678           7          6.0         167.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1424           1424           1          7.0         142.4       1.0X
-date_trunc MM wholestage on                        1423           1431           8          7.0         142.3       1.0X
+date_trunc MM wholestage off                       1710           1717          11          5.8         171.0       1.0X
+date_trunc MM wholestage on                        1670           1679           5          6.0         167.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1251           1257           9          8.0         125.1       1.0X
-date_trunc DAY wholestage on                       1277           1283           7          7.8         127.7       1.0X
+date_trunc DAY wholestage off                       636            638           3         15.7          63.6       1.0X
+date_trunc DAY wholestage on                        585            589           5         17.1          58.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1250           1250           0          8.0         125.0       1.0X
-date_trunc DD wholestage on                        1278           1278           1          7.8         127.8       1.0X
+date_trunc DD wholestage off                        635            648          18         15.7          63.5       1.0X
+date_trunc DD wholestage on                         581            587           5         17.2          58.1       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1212           1213           1          8.3         121.2       1.0X
-date_trunc HOUR wholestage on                      1233           1237           3          8.1         123.3       1.0X
+date_trunc HOUR wholestage off                      614            617           5         16.3          61.4       1.0X
+date_trunc HOUR wholestage on                       575            579           2         17.4          57.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1243           1244           1          8.0         124.3       1.0X
-date_trunc MINUTE wholestage on                    1216           1222           7          8.2         121.6       1.0X
+date_trunc MINUTE wholestage off                    617            619           2         16.2          61.7       1.0X
+date_trunc MINUTE wholestage on                     579            589          16         17.3          57.9       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    304            309           7         32.9          30.4       1.0X
-date_trunc SECOND wholestage on                     261            265           4         38.3          26.1       1.2X
+date_trunc SECOND wholestage off                    302            302           1         33.2          30.2       1.0X
+date_trunc SECOND wholestage on                     273            279           7         36.7          27.3       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1351           1352           0          7.4         135.1       1.0X
-date_trunc WEEK wholestage on                      1320           1326           4          7.6         132.0       1.0X
+date_trunc WEEK wholestage off                     1623           1631          12          6.2         162.3       1.0X
+date_trunc WEEK wholestage on                      1580           1597          17          6.3         158.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  1898           1900           3          5.3         189.8       1.0X
-date_trunc QUARTER wholestage on                   1824           1828           3          5.5         182.4       1.0X
+date_trunc QUARTER wholestage off                  2020           2025           6          4.9         202.0       1.0X
+date_trunc QUARTER wholestage on                   1958           1970           8          5.1         195.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           765            765           0         13.1          76.5       1.0X
-trunc year wholestage on                            728            731           2         13.7          72.8       1.1X
+trunc year wholestage off                           792            802          14         12.6          79.2       1.0X
+trunc year wholestage on                            744            750          11         13.4          74.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           759            760           2         13.2          75.9       1.0X
-trunc yyyy wholestage on                            728            730           2         13.7          72.8       1.0X
+trunc yyyy wholestage off                           798            801           4         12.5          79.8       1.0X
+trunc yyyy wholestage on                            743            761          18         13.5          74.3       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             765            767           2         13.1          76.5       1.0X
-trunc yy wholestage on                              728            730           2         13.7          72.8       1.1X
+trunc yy wholestage off                             792            802          14         12.6          79.2       1.0X
+trunc yy wholestage on                              744            755          14         13.4          74.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            737            738           1         13.6          73.7       1.0X
-trunc mon wholestage on                             699            700           1         14.3          69.9       1.1X
+trunc mon wholestage off                            768            771           3         13.0          76.8       1.0X
+trunc mon wholestage on                             721            731          10         13.9          72.1       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          732            740          11         13.7          73.2       1.0X
-trunc month wholestage on                           698            710          13         14.3          69.8       1.0X
+trunc month wholestage off                          774            781           9         12.9          77.4       1.0X
+trunc month wholestage on                           723            728           6         13.8          72.3       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             734            736           4         13.6          73.4       1.0X
-trunc mm wholestage on                              698            700           3         14.3          69.8       1.1X
+trunc mm wholestage off                             766            770           5         13.0          76.6       1.0X
+trunc mm wholestage on                              724            732           9         13.8          72.4       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                      97             98           2         10.3          96.9       1.0X
-to timestamp str wholestage on                       98            101           3         10.2          98.0       1.0X
+to timestamp str wholestage off                     107            107           0          9.3         107.0       1.0X
+to timestamp str wholestage on                      123            127           4          8.1         122.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         706            707           1          1.4         706.4       1.0X
-to_timestamp wholestage on                          686            690           5          1.5         685.8       1.0X
+to_timestamp wholestage off                         643            650           9          1.6         643.1       1.0X
+to_timestamp wholestage on                          659            669          11          1.5         659.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    687            688           1          1.5         687.1       1.0X
-to_unix_timestamp wholestage on                     679            681           2          1.5         679.3       1.0X
+to_unix_timestamp wholestage off                    660            660           1          1.5         659.5       1.0X
+to_unix_timestamp wholestage on                     660            666           5          1.5         659.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          131            132           1          7.6         131.3       1.0X
-to date str wholestage on                           126            129           2          7.9         126.5       1.0X
+to date str wholestage off                          163            163           1          6.1         162.9       1.0X
+to date str wholestage on                           169            172           5          5.9         168.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                              654            658           5          1.5         654.1       1.0X
-to_date wholestage on                               641            642           1          1.6         640.9       1.0X
+to_date wholestage off                              654            658           5          1.5         654.5       1.0X
+to_date wholestage on                               656            662           4          1.5         656.5       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  267            270           4         18.7          53.4       1.0X
-From java.time.LocalDate                            217            219           2         23.0          43.4       1.2X
-Collect java.sql.Date                              1294           1345          74          3.9         258.7       0.2X
-Collect java.time.LocalDate                         997           1039          60          5.0         199.5       0.3X
-From java.sql.Timestamp                             229            237           7         21.9          45.8       1.2X
-From java.time.Instant                              190            208          17         26.3          38.1       1.4X
-Collect longs                                      1009           1090          98          5.0         201.9       0.3X
-Collect java.sql.Timestamp                         1066           1137          63          4.7         213.2       0.3X
-Collect java.time.Instant                          1012           1172         140          4.9         202.4       0.3X
-java.sql.Date to Hive string                       3822           3929          93          1.3         764.4       0.1X
-java.time.LocalDate to Hive string                 3120           3139          17          1.6         624.0       0.1X
-java.sql.Timestamp to Hive string                  6506           6623         166          0.8        1301.2       0.0X
-java.time.Instant to Hive string                   4192           4217          25          1.2         838.4       0.1X
+From java.sql.Date                                  317            319           4         15.8          63.3       1.0X
+From java.time.LocalDate                            235            235           1         21.3          46.9       1.3X
+Collect java.sql.Date                              1550           1622          77          3.2         310.1       0.2X
+Collect java.time.LocalDate                        1021           1145         108          4.9         204.2       0.3X
+From java.sql.Timestamp                             241            257          15         20.8          48.1       1.3X
+From java.time.Instant                              206            215          13         24.2          41.3       1.5X
+Collect longs                                      1065           1186         205          4.7         213.0       0.3X
+Collect java.sql.Timestamp                         1330           1414          73          3.8         266.1       0.2X
+Collect java.time.Instant                          1180           1303         107          4.2         236.0       0.3X
+java.sql.Date to Hive string                       5630           5668          38          0.9        1125.9       0.1X
+java.time.LocalDate to Hive string                 4147           4197          67          1.2         829.3       0.1X
+java.sql.Timestamp to Hive string                  7441           7609         160          0.7        1488.1       0.0X
+java.time.Instant to Hive string                   5179           5237          94          1.0        1035.8       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk25-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk25-results.txt
index 18ac977366fb0..b83a112c9c949 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk25-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk25-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                  827            865          58         12.1          82.7       1.0X
-date + interval(m, d)                               808            823          21         12.4          80.8       1.0X
-date + interval(m, d, ms)                          3344           3345           1          3.0         334.4       0.2X
-date - interval(m)                                  793            795           2         12.6          79.3       1.0X
-date - interval(m, d)                               806            815           8         12.4          80.6       1.0X
-date - interval(m, d, ms)                          3384           3385           2          3.0         338.4       0.2X
-timestamp + interval(m)                            1652           1657           7          6.1         165.2       0.5X
-timestamp + interval(m, d)                         1701           1704           4          5.9         170.1       0.5X
-timestamp + interval(m, d, ms)                     2033           2033           0          4.9         203.3       0.4X
-timestamp - interval(m)                            1778           1782           6          5.6         177.8       0.5X
-timestamp - interval(m, d)                         1839           1842           4          5.4         183.9       0.4X
-timestamp - interval(m, d, ms)                     2045           2050           8          4.9         204.5       0.4X
+date + interval(m)                                  836            853          23         12.0          83.6       1.0X
+date + interval(m, d)                               827            891         101         12.1          82.7       1.0X
+date + interval(m, d, ms)                          3338           3343           7          3.0         333.8       0.3X
+date - interval(m)                                  805            808           4         12.4          80.5       1.0X
+date - interval(m, d)                               823            826           4         12.1          82.3       1.0X
+date - interval(m, d, ms)                          3338           3344           8          3.0         333.8       0.3X
+timestamp + interval(m)                            1697           1700           4          5.9         169.7       0.5X
+timestamp + interval(m, d)                         1757           1761           5          5.7         175.7       0.5X
+timestamp + interval(m, d, ms)                     2124           2125           1          4.7         212.4       0.4X
+timestamp - interval(m)                            1861           1868          11          5.4         186.1       0.4X
+timestamp - interval(m, d)                         1940           1941           1          5.2         194.0       0.4X
+timestamp - interval(m, d, ms)                     2120           2124           5          4.7         212.0       0.4X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    197            199           2         50.7          19.7       1.0X
-cast to timestamp wholestage on                     206            214           6         48.6          20.6       1.0X
+cast to timestamp wholestage off                    199            200           1         50.2          19.9       1.0X
+cast to timestamp wholestage on                     214            228          26         46.8          21.4       0.9X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    656            665          12         15.2          65.6       1.0X
-year of timestamp wholestage on                     604            613          14         16.6          60.4       1.1X
+year of timestamp wholestage off                    604            605           1         16.6          60.4       1.0X
+year of timestamp wholestage on                     609            616           7         16.4          60.9       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 648            649           1         15.4          64.8       1.0X
-quarter of timestamp wholestage on                  622            630           5         16.1          62.2       1.0X
+quarter of timestamp wholestage off                 656            665          13         15.3          65.6       1.0X
+quarter of timestamp wholestage on                  640            649           7         15.6          64.0       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   623            626           4         16.0          62.3       1.0X
-month of timestamp wholestage on                    620            624           3         16.1          62.0       1.0X
+month of timestamp wholestage off                   626            626           1         16.0          62.6       1.0X
+month of timestamp wholestage on                    620            625           3         16.1          62.0       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1002           1003           2         10.0         100.2       1.0X
-weekofyear of timestamp wholestage on               981            992          19         10.2          98.1       1.0X
+weekofyear of timestamp wholestage off             1028           1029           1          9.7         102.8       1.0X
+weekofyear of timestamp wholestage on              1032           1040           9          9.7         103.2       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     628            628           1         15.9          62.8       1.0X
-day of timestamp wholestage on                      621            626           4         16.1          62.1       1.0X
+day of timestamp wholestage off                     640            642           3         15.6          64.0       1.0X
+day of timestamp wholestage on                      626            630           4         16.0          62.6       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               667            679          17         15.0          66.7       1.0X
-dayofyear of timestamp wholestage on                659            669           9         15.2          65.9       1.0X
+dayofyear of timestamp wholestage off               666            674          11         15.0          66.6       1.0X
+dayofyear of timestamp wholestage on                665            670           5         15.0          66.5       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              630            644          21         15.9          63.0       1.0X
-dayofmonth of timestamp wholestage on               621            626           3         16.1          62.1       1.0X
+dayofmonth of timestamp wholestage off              638            638           0         15.7          63.8       1.0X
+dayofmonth of timestamp wholestage on               623            635          12         16.1          62.3       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               792            794           2         12.6          79.2       1.0X
-dayofweek of timestamp wholestage on                786            787           2         12.7          78.6       1.0X
+dayofweek of timestamp wholestage off               796            797           1         12.6          79.6       1.0X
+dayofweek of timestamp wholestage on                798            802           2         12.5          79.8       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 730            745          23         13.7          73.0       1.0X
-weekday of timestamp wholestage on                  730            734           3         13.7          73.0       1.0X
+weekday of timestamp wholestage off                 732            732           1         13.7          73.2       1.0X
+weekday of timestamp wholestage on                  736            738           2         13.6          73.6       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    539            540           1         18.6          53.9       1.0X
-hour of timestamp wholestage on                     535            538           4         18.7          53.5       1.0X
+hour of timestamp wholestage off                    533            534           2         18.8          53.3       1.0X
+hour of timestamp wholestage on                     545            547           2         18.3          54.5       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  537            540           5         18.6          53.7       1.0X
-minute of timestamp wholestage on                   538            544          13         18.6          53.8       1.0X
+minute of timestamp wholestage off                  535            539           5         18.7          53.5       1.0X
+minute of timestamp wholestage on                   545            547           3         18.4          54.5       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  535            535           0         18.7          53.5       1.0X
-second of timestamp wholestage on                   546            550           2         18.3          54.6       1.0X
+second of timestamp wholestage off                  537            541           6         18.6          53.7       1.0X
+second of timestamp wholestage on                   547            552           5         18.3          54.7       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         189            191           2         52.8          18.9       1.0X
-current_date wholestage on                          203            206           3         49.4          20.3       0.9X
+current_date wholestage off                         192            201          13         52.2          19.2       1.0X
+current_date wholestage on                          208            212           5         48.1          20.8       0.9X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    211            239          40         47.4          21.1       1.0X
-current_timestamp wholestage on                     210            220          11         47.5          21.0       1.0X
+current_timestamp wholestage off                    202            205           5         49.6          20.2       1.0X
+current_timestamp wholestage on                     212            217           5         47.2          21.2       1.0X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         571            574           5         17.5          57.1       1.0X
-cast to date wholestage on                          574            577           4         17.4          57.4       1.0X
+cast to date wholestage off                         591            591           1         16.9          59.1       1.0X
+cast to date wholestage on                          584            586           1         17.1          58.4       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             659            661           3         15.2          65.9       1.0X
-last_day wholestage on                              659            668           6         15.2          65.9       1.0X
+last_day wholestage off                             651            653           3         15.4          65.1       1.0X
+last_day wholestage on                              672            678           6         14.9          67.2       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             607            609           3         16.5          60.7       1.0X
-next_day wholestage on                              610            616           4         16.4          61.0       1.0X
+next_day wholestage off                             622            623           2         16.1          62.2       1.0X
+next_day wholestage on                              611            617           4         16.4          61.1       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             559            562           4         17.9          55.9       1.0X
-date_add wholestage on                              572            579           9         17.5          57.2       1.0X
+date_add wholestage off                             564            569           8         17.7          56.4       1.0X
+date_add wholestage on                              564            570           4         17.7          56.4       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             566            577          15         17.7          56.6       1.0X
-date_sub wholestage on                              573            575           2         17.5          57.3       1.0X
+date_sub wholestage off                             568            568           1         17.6          56.8       1.0X
+date_sub wholestage on                              573            575           2         17.4          57.3       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                           791            793           3         12.6          79.1       1.0X
-add_months wholestage on                            792            799           5         12.6          79.2       1.0X
+add_months wholestage off                           799            809          15         12.5          79.9       1.0X
+add_months wholestage on                            805            814          14         12.4          80.5       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         2790           2842          74          3.6         279.0       1.0X
-format date wholestage on                          2810           2831          13          3.6         281.0       1.0X
+format date wholestage off                         2907           2981         104          3.4         290.7       1.0X
+format date wholestage on                          2902           2909           9          3.4         290.2       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       2669           2671           4          3.7         266.9       1.0X
-from_unixtime wholestage on                        2648           2657           9          3.8         264.8       1.0X
+from_unixtime wholestage off                       2825           2825           0          3.5         282.5       1.0X
+from_unixtime wholestage on                        2667           2673           6          3.8         266.7       1.1X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   681            686           7         14.7          68.1       1.0X
-from_utc_timestamp wholestage on                    719            725           7         13.9          71.9       0.9X
+from_utc_timestamp wholestage off                   733            733           1         13.6          73.3       1.0X
+from_utc_timestamp wholestage on                    698            701           8         14.3          69.8       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                     824            826           2         12.1          82.4       1.0X
-to_utc_timestamp wholestage on                      856            860           3         11.7          85.6       1.0X
+to_utc_timestamp wholestage off                     863            866           4         11.6          86.3       1.0X
+to_utc_timestamp wholestage on                      873            876           2         11.5          87.3       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        245            248           5         40.9          24.5       1.0X
-cast interval wholestage on                         210            215           5         47.7          21.0       1.2X
+cast interval wholestage off                        285            286           1         35.1          28.5       1.0X
+cast interval wholestage on                         208            211           3         48.1          20.8       1.4X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                             963            965           2         10.4          96.3       1.0X
-datediff wholestage on                              911            917           6         11.0          91.1       1.1X
+datediff wholestage off                             943            943           0         10.6          94.3       1.0X
+datediff wholestage on                              940            945           4         10.6          94.0       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3058           3059           2          3.3         305.8       1.0X
-months_between wholestage on                       3073           3082           5          3.3         307.3       1.0X
+months_between wholestage off                      3189           3192           5          3.1         318.9       1.0X
+months_between wholestage on                       3184           3191           5          3.1         318.4       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               351            352           1          2.8         351.3       1.0X
-window wholestage on                                703            710           7          1.4         703.4       0.5X
+window wholestage off                               574            586          17          1.7         573.6       1.0X
+window wholestage on                                686            714          26          1.5         685.8       0.8X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1631           1633           3          6.1         163.1       1.0X
-date_trunc YEAR wholestage on                      1671           1679           5          6.0         167.1       1.0X
+date_trunc YEAR wholestage off                     1705           1708           4          5.9         170.5       1.0X
+date_trunc YEAR wholestage on                      1635           1639           3          6.1         163.5       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1635           1643          11          6.1         163.5       1.0X
-date_trunc YYYY wholestage on                      1679           1682           2          6.0         167.9       1.0X
+date_trunc YYYY wholestage off                     1709           1710           2          5.9         170.9       1.0X
+date_trunc YYYY wholestage on                      1636           1639           3          6.1         163.6       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1631           1635           5          6.1         163.1       1.0X
-date_trunc YY wholestage on                        1681           1683           1          5.9         168.1       1.0X
+date_trunc YY wholestage off                       1708           1710           2          5.9         170.8       1.0X
+date_trunc YY wholestage on                        1635           1637           2          6.1         163.5       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1656           1657           2          6.0         165.6       1.0X
-date_trunc MON wholestage on                       1626           1630           3          6.2         162.6       1.0X
+date_trunc MON wholestage off                      1795           1797           3          5.6         179.5       1.0X
+date_trunc MON wholestage on                       1633           1636           3          6.1         163.3       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1655           1655           1          6.0         165.5       1.0X
-date_trunc MONTH wholestage on                     1625           1629           4          6.2         162.5       1.0X
+date_trunc MONTH wholestage off                    1795           1795           0          5.6         179.5       1.0X
+date_trunc MONTH wholestage on                     1632           1633           1          6.1         163.2       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1655           1658           4          6.0         165.5       1.0X
-date_trunc MM wholestage on                        1624           1633          11          6.2         162.4       1.0X
+date_trunc MM wholestage off                       1797           1798           0          5.6         179.7       1.0X
+date_trunc MM wholestage on                        1633           1635           3          6.1         163.3       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1194           1195           2          8.4         119.4       1.0X
-date_trunc DAY wholestage on                       1260           1264           5          7.9         126.0       0.9X
+date_trunc DAY wholestage off                       571            572           1         17.5          57.1       1.0X
+date_trunc DAY wholestage on                        541            546           5         18.5          54.1       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1195           1197           4          8.4         119.5       1.0X
-date_trunc DD wholestage on                        1260           1262           1          7.9         126.0       0.9X
+date_trunc DD wholestage off                        572            572           0         17.5          57.2       1.0X
+date_trunc DD wholestage on                         543            550           5         18.4          54.3       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1197           1199           4          8.4         119.7       1.0X
-date_trunc HOUR wholestage on                      1149           1152           2          8.7         114.9       1.0X
+date_trunc HOUR wholestage off                      546            547           1         18.3          54.6       1.0X
+date_trunc HOUR wholestage on                       516            518           2         19.4          51.6       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1290           1290           0          7.8         129.0       1.0X
-date_trunc MINUTE wholestage on                    1216           1217           1          8.2         121.6       1.1X
+date_trunc MINUTE wholestage off                    547            552           7         18.3          54.7       1.0X
+date_trunc MINUTE wholestage on                     498            502           3         20.1          49.8       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    309            309           1         32.4          30.9       1.0X
-date_trunc SECOND wholestage on                     274            277           2         36.5          27.4       1.1X
+date_trunc SECOND wholestage off                    308            308           0         32.4          30.8       1.0X
+date_trunc SECOND wholestage on                     269            273           3         37.1          26.9       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1523           1543          27          6.6         152.3       1.0X
-date_trunc WEEK wholestage on                      1644           1645           2          6.1         164.4       0.9X
+date_trunc WEEK wholestage off                     1619           1622           4          6.2         161.9       1.0X
+date_trunc WEEK wholestage on                      1547           1551           3          6.5         154.7       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2130           2141          15          4.7         213.0       1.0X
-date_trunc QUARTER wholestage on                   1996           2008          24          5.0         199.6       1.1X
+date_trunc QUARTER wholestage off                  2163           2171          12          4.6         216.3       1.0X
+date_trunc QUARTER wholestage on                   2016           2040          16          5.0         201.6       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           767            767           1         13.0          76.7       1.0X
-trunc year wholestage on                            728            732           4         13.7          72.8       1.1X
+trunc year wholestage off                           781            782           1         12.8          78.1       1.0X
+trunc year wholestage on                            736            739           2         13.6          73.6       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           766            766           0         13.1          76.6       1.0X
-trunc yyyy wholestage on                            727            731           3         13.7          72.7       1.1X
+trunc yyyy wholestage off                           783            783           0         12.8          78.3       1.0X
+trunc yyyy wholestage on                            735            738           2         13.6          73.5       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             766            767           0         13.0          76.6       1.0X
-trunc yy wholestage on                              726            730           3         13.8          72.6       1.1X
+trunc yy wholestage off                             782            785           3         12.8          78.2       1.0X
+trunc yy wholestage on                              737            739           2         13.6          73.7       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            727            728           1         13.8          72.7       1.0X
-trunc mon wholestage on                             696            703           7         14.4          69.6       1.0X
+trunc mon wholestage off                            750            750           0         13.3          75.0       1.0X
+trunc mon wholestage on                             688            689           2         14.5          68.8       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          721            724           4         13.9          72.1       1.0X
-trunc month wholestage on                           693            705          11         14.4          69.3       1.0X
+trunc month wholestage off                          751            765          20         13.3          75.1       1.0X
+trunc month wholestage on                           686            689           3         14.6          68.6       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             728            728           0         13.7          72.8       1.0X
-trunc mm wholestage on                              700            706           6         14.3          70.0       1.0X
+trunc mm wholestage off                             751            751           1         13.3          75.1       1.0X
+trunc mm wholestage on                              685            688           4         14.6          68.5       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                      89             91           2         11.2          89.5       1.0X
-to timestamp str wholestage on                       86             97          14         11.6          86.1       1.0X
+to timestamp str wholestage off                      92             93           2         10.9          91.9       1.0X
+to timestamp str wholestage on                       84             86           1         11.9          83.9       1.1X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         548            548           0          1.8         548.4       1.0X
-to_timestamp wholestage on                          571            575           4          1.8         571.4       1.0X
+to_timestamp wholestage off                         611            611           1          1.6         610.9       1.0X
+to_timestamp wholestage on                          601            603           2          1.7         601.3       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    574            574           0          1.7         573.8       1.0X
-to_unix_timestamp wholestage on                     569            576          10          1.8         569.3       1.0X
+to_unix_timestamp wholestage off                    609            611           4          1.6         608.6       1.0X
+to_unix_timestamp wholestage on                     593            595           2          1.7         593.4       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          106            108           4          9.4         105.9       1.0X
-to date str wholestage on                           105            106           1          9.5         105.2       1.0X
+to date str wholestage off                          111            114           4          9.0         110.9       1.0X
+to date str wholestage on                           110            112           2          9.1         109.9       1.0X
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                              543            545           2          1.8         543.5       1.0X
-to_date wholestage on                               555            556           1          1.8         554.9       1.0X
+to_date wholestage off                              620            622           3          1.6         620.0       1.0X
+to_date wholestage on                               611            613           2          1.6         611.3       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  292            293           2         17.1          58.4       1.0X
-From java.time.LocalDate                            227            229           3         22.0          45.4       1.3X
-Collect java.sql.Date                              1382           1410          28          3.6         276.4       0.2X
-Collect java.time.LocalDate                        1033           1171         123          4.8         206.7       0.3X
-From java.sql.Timestamp                             229            246          16         21.9          45.7       1.3X
-From java.time.Instant                              201            217          14         24.9          40.1       1.5X
-Collect longs                                      1030           1170         133          4.9         206.0       0.3X
-Collect java.sql.Timestamp                          946           1219         238          5.3         189.2       0.3X
-Collect java.time.Instant                          1036           1113         107          4.8         207.2       0.3X
-java.sql.Date to Hive string                       3823           3971         129          1.3         764.6       0.1X
-java.time.LocalDate to Hive string                 3058           3188         190          1.6         611.7       0.1X
-java.sql.Timestamp to Hive string                  6532           6816         249          0.8        1306.3       0.0X
-java.time.Instant to Hive string                   4228           4275          51          1.2         845.6       0.1X
+From java.sql.Date                                  290            291           1         17.2          58.1       1.0X
+From java.time.LocalDate                            224            227           2         22.3          44.8       1.3X
+Collect java.sql.Date                              1274           1300          30          3.9         254.8       0.2X
+Collect java.time.LocalDate                         939           1088         158          5.3         187.9       0.3X
+From java.sql.Timestamp                             248            257           8         20.2          49.6       1.2X
+From java.time.Instant                              195            205          16         25.7          38.9       1.5X
+Collect longs                                      1042           1067          22          4.8         208.3       0.3X
+Collect java.sql.Timestamp                         1136           1226          84          4.4         227.3       0.3X
+Collect java.time.Instant                           925           1042         101          5.4         185.0       0.3X
+java.sql.Date to Hive string                       4507           4562          81          1.1         901.3       0.1X
+java.time.LocalDate to Hive string                 3638           3757         103          1.4         727.6       0.1X
+java.sql.Timestamp to Hive string                  7280           7445         169          0.7        1456.0       0.0X
+java.time.Instant to Hive string                   4745           4883         136          1.1         949.1       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index f60a04674b905..98f63cc199ee2 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                  991           1086         134         10.1          99.1       1.0X
-date + interval(m, d)                               991           1006          21         10.1          99.1       1.0X
-date + interval(m, d, ms)                          3879           3886           9          2.6         387.9       0.3X
-date - interval(m)                                  978            981           5         10.2          97.8       1.0X
-date - interval(m, d)                              1007           1008           2          9.9         100.7       1.0X
-date - interval(m, d, ms)                          3911           3917           8          2.6         391.1       0.3X
-timestamp + interval(m)                            1659           1660           2          6.0         165.9       0.6X
-timestamp + interval(m, d)                         1721           1723           2          5.8         172.1       0.6X
-timestamp + interval(m, d, ms)                     2027           2039          17          4.9         202.7       0.5X
-timestamp - interval(m)                            1756           1762           9          5.7         175.6       0.6X
-timestamp - interval(m, d)                         1854           1860           9          5.4         185.4       0.5X
-timestamp - interval(m, d, ms)                     2022           2023           0          4.9         202.2       0.5X
+date + interval(m)                                 1027           1051          34          9.7         102.7       1.0X
+date + interval(m, d)                               996            996           1         10.0          99.6       1.0X
+date + interval(m, d, ms)                          4122           4129          10          2.4         412.2       0.2X
+date - interval(m)                                  974            981           5         10.3          97.4       1.1X
+date - interval(m, d)                              1001           1006           7         10.0         100.1       1.0X
+date - interval(m, d, ms)                          4146           4161          22          2.4         414.6       0.2X
+timestamp + interval(m)                            1819           1819           0          5.5         181.9       0.6X
+timestamp + interval(m, d)                         1866           1868           4          5.4         186.6       0.6X
+timestamp + interval(m, d, ms)                     2113           2117           5          4.7         211.3       0.5X
+timestamp - interval(m)                            1826           1834          11          5.5         182.6       0.6X
+timestamp - interval(m, d)                         1904           1905           1          5.3         190.4       0.5X
+timestamp - interval(m, d, ms)                     2110           2111           1          4.7         211.0       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    201            202           1         49.8          20.1       1.0X
-cast to timestamp wholestage on                     214            230          23         46.8          21.4       0.9X
+cast to timestamp wholestage off                    198            198           0         50.4          19.8       1.0X
+cast to timestamp wholestage on                     213            224          16         47.0          21.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    778            783           7         12.8          77.8       1.0X
-year of timestamp wholestage on                     784            788           2         12.8          78.4       1.0X
+year of timestamp wholestage off                    761            764           4         13.1          76.1       1.0X
+year of timestamp wholestage on                     759            764           5         13.2          75.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 803            806           5         12.5          80.3       1.0X
-quarter of timestamp wholestage on                  800            806           4         12.5          80.0       1.0X
+quarter of timestamp wholestage off                 783            784           1         12.8          78.3       1.0X
+quarter of timestamp wholestage on                  787            794           6         12.7          78.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   787            798          15         12.7          78.7       1.0X
-month of timestamp wholestage on                    781            791          10         12.8          78.1       1.0X
+month of timestamp wholestage off                   767            767           0         13.0          76.7       1.0X
+month of timestamp wholestage on                    771            773           1         13.0          77.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1126           1131           7          8.9         112.6       1.0X
-weekofyear of timestamp wholestage on              1152           1161          12          8.7         115.2       1.0X
+weekofyear of timestamp wholestage off             1030           1041          16          9.7         103.0       1.0X
+weekofyear of timestamp wholestage on              1041           1052          12          9.6         104.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     786            789           5         12.7          78.6       1.0X
-day of timestamp wholestage on                      781            781           0         12.8          78.1       1.0X
+day of timestamp wholestage off                     761            765           6         13.1          76.1       1.0X
+day of timestamp wholestage on                      760            763           2         13.2          76.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               813            814           2         12.3          81.3       1.0X
-dayofyear of timestamp wholestage on                805            807           2         12.4          80.5       1.0X
+dayofyear of timestamp wholestage off               798            798           0         12.5          79.8       1.0X
+dayofyear of timestamp wholestage on                799            802           3         12.5          79.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              791            794           5         12.6          79.1       1.0X
-dayofmonth of timestamp wholestage on               787            790           3         12.7          78.7       1.0X
+dayofmonth of timestamp wholestage off              772            774           2         12.9          77.2       1.0X
+dayofmonth of timestamp wholestage on               763            770           9         13.1          76.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               935            938           5         10.7          93.5       1.0X
-dayofweek of timestamp wholestage on                930            934           5         10.8          93.0       1.0X
+dayofweek of timestamp wholestage off               903            904           1         11.1          90.3       1.0X
+dayofweek of timestamp wholestage on                910            915           5         11.0          91.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 887            889           2         11.3          88.7       1.0X
-weekday of timestamp wholestage on                  885            892          10         11.3          88.5       1.0X
+weekday of timestamp wholestage off                 861            862           2         11.6          86.1       1.0X
+weekday of timestamp wholestage on                  869            880          16         11.5          86.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    611            615           5         16.4          61.1       1.0X
-hour of timestamp wholestage on                     610            617           9         16.4          61.0       1.0X
+hour of timestamp wholestage off                    598            600           4         16.7          59.8       1.0X
+hour of timestamp wholestage on                     602            609           6         16.6          60.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  609            613           6         16.4          60.9       1.0X
-minute of timestamp wholestage on                   611            618           9         16.4          61.1       1.0X
+minute of timestamp wholestage off                  598            599           1         16.7          59.8       1.0X
+minute of timestamp wholestage on                   603            611          11         16.6          60.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  613            613           1         16.3          61.3       1.0X
-second of timestamp wholestage on                   615            617           2         16.3          61.5       1.0X
+second of timestamp wholestage off                  604            604           0         16.6          60.4       1.0X
+second of timestamp wholestage on                   604            609           4         16.6          60.4       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         188            188           1         53.3          18.8       1.0X
-current_date wholestage on                          216            228          13         46.3          21.6       0.9X
+current_date wholestage off                         182            182           0         55.0          18.2       1.0X
+current_date wholestage on                          213            217           3         47.0          21.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    198            198           0         50.5          19.8       1.0X
-current_timestamp wholestage on                     224            238          20         44.7          22.4       0.9X
+current_timestamp wholestage off                    190            193           4         52.7          19.0       1.0X
+current_timestamp wholestage on                     216            232          12         46.4          21.6       0.9X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         684            684           0         14.6          68.4       1.0X
-cast to date wholestage on                          682            684           2         14.7          68.2       1.0X
+cast to date wholestage off                         659            661           4         15.2          65.9       1.0X
+cast to date wholestage on                          672            675           4         14.9          67.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             798            799           1         12.5          79.8       1.0X
-last_day wholestage on                              801            805           3         12.5          80.1       1.0X
+last_day wholestage off                             770            771           2         13.0          77.0       1.0X
+last_day wholestage on                              772            777           5         13.0          77.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             713            714           1         14.0          71.3       1.0X
-next_day wholestage on                              714            719           7         14.0          71.4       1.0X
+next_day wholestage off                             690            703          18         14.5          69.0       1.0X
+next_day wholestage on                              706            709           2         14.2          70.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             668            669           1         15.0          66.8       1.0X
-date_add wholestage on                              693            706          16         14.4          69.3       1.0X
+date_add wholestage off                             644            645           1         15.5          64.4       1.0X
+date_add wholestage on                              645            648           2         15.5          64.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             682            683           2         14.7          68.2       1.0X
-date_sub wholestage on                              692            694           2         14.4          69.2       1.0X
+date_sub wholestage off                             637            640           4         15.7          63.7       1.0X
+date_sub wholestage on                              650            657           8         15.4          65.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                           934            935           1         10.7          93.4       1.0X
-add_months wholestage on                            935            942           6         10.7          93.5       1.0X
+add_months wholestage off                           908            916          12         11.0          90.8       1.0X
+add_months wholestage on                            911            915           5         11.0          91.1       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3548           3554           9          2.8         354.8       1.0X
-format date wholestage on                          3716           3721           4          2.7         371.6       1.0X
+format date wholestage off                         3288           3294           8          3.0         328.8       1.0X
+format date wholestage on                          3324           3363          51          3.0         332.4       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       3352           3355           4          3.0         335.2       1.0X
-from_unixtime wholestage on                        3530           3540           7          2.8         353.0       0.9X
+from_unixtime wholestage off                       3654           3656           2          2.7         365.4       1.0X
+from_unixtime wholestage on                        3622           3633          12          2.8         362.2       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   735            737           3         13.6          73.5       1.0X
-from_utc_timestamp wholestage on                    844            849           6         11.9          84.4       0.9X
+from_utc_timestamp wholestage off                   721            724           5         13.9          72.1       1.0X
+from_utc_timestamp wholestage on                    842            843           1         11.9          84.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1093           1095           3          9.2         109.3       1.0X
-to_utc_timestamp wholestage on                     1146           1159          18          8.7         114.6       1.0X
+to_utc_timestamp wholestage off                    1175           1186          17          8.5         117.5       1.0X
+to_utc_timestamp wholestage on                     1081           1092          14          9.3         108.1       1.1X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        222            224           3         45.1          22.2       1.0X
-cast interval wholestage on                         219            221           2         45.6          21.9       1.0X
+cast interval wholestage off                        230            231           2         43.5          23.0       1.0X
+cast interval wholestage on                         212            215           4         47.2          21.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1168           1170           4          8.6         116.8       1.0X
-datediff wholestage on                             1151           1153           2          8.7         115.1       1.0X
+datediff wholestage off                            1118           1121           4          8.9         111.8       1.0X
+datediff wholestage on                             1174           1185          20          8.5         117.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3295           3308          18          3.0         329.5       1.0X
-months_between wholestage on                       3259           3262           4          3.1         325.9       1.0X
+months_between wholestage off                      3213           3217           5          3.1         321.3       1.0X
+months_between wholestage on                       3238           3248          16          3.1         323.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               394            405          16          2.5         393.6       1.0X
-window wholestage on                                651            668          12          1.5         651.4       0.6X
+window wholestage off                               632            647          21          1.6         632.3       1.0X
+window wholestage on                                629            652          17          1.6         629.0       1.0X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1751           1756           6          5.7         175.1       1.0X
-date_trunc YEAR wholestage on                      1680           1683           2          6.0         168.0       1.0X
+date_trunc YEAR wholestage off                     1686           1687           1          5.9         168.6       1.0X
+date_trunc YEAR wholestage on                      1747           1749           2          5.7         174.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1751           1751           0          5.7         175.1       1.0X
-date_trunc YYYY wholestage on                      1680           1684           7          6.0         168.0       1.0X
+date_trunc YYYY wholestage off                     1682           1684           2          5.9         168.2       1.0X
+date_trunc YYYY wholestage on                      1746           1749           2          5.7         174.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1755           1756           1          5.7         175.5       1.0X
-date_trunc YY wholestage on                        1680           1683           3          6.0         168.0       1.0X
+date_trunc YY wholestage off                       1683           1683           1          5.9         168.3       1.0X
+date_trunc YY wholestage on                        1747           1749           3          5.7         174.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1762           1773          16          5.7         176.2       1.0X
-date_trunc MON wholestage on                       1741           1749          11          5.7         174.1       1.0X
+date_trunc MON wholestage off                      1718           1727          13          5.8         171.8       1.0X
+date_trunc MON wholestage on                       1710           1716           6          5.8         171.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1757           1767          15          5.7         175.7       1.0X
-date_trunc MONTH wholestage on                     1746           1751           9          5.7         174.6       1.0X
+date_trunc MONTH wholestage off                    1721           1723           3          5.8         172.1       1.0X
+date_trunc MONTH wholestage on                     1710           1730          32          5.8         171.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1760           1761           1          5.7         176.0       1.0X
-date_trunc MM wholestage on                        1743           1746           4          5.7         174.3       1.0X
+date_trunc MM wholestage off                       1721           1721           0          5.8         172.1       1.0X
+date_trunc MM wholestage on                        1710           1719          10          5.8         171.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1309           1309           1          7.6         130.9       1.0X
-date_trunc DAY wholestage on                       1259           1261           2          7.9         125.9       1.0X
+date_trunc DAY wholestage off                       603            627          33         16.6          60.3       1.0X
+date_trunc DAY wholestage on                        561            567           9         17.8          56.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1310           1312           3          7.6         131.0       1.0X
-date_trunc DD wholestage on                        1259           1262           2          7.9         125.9       1.0X
+date_trunc DD wholestage off                        605            609           5         16.5          60.5       1.0X
+date_trunc DD wholestage on                         562            563           1         17.8          56.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1309           1312           5          7.6         130.9       1.0X
-date_trunc HOUR wholestage on                      1275           1281           5          7.8         127.5       1.0X
+date_trunc HOUR wholestage off                      605            608           4         16.5          60.5       1.0X
+date_trunc HOUR wholestage on                       563            570          12         17.8          56.3       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1339           1342           3          7.5         133.9       1.0X
-date_trunc MINUTE wholestage on                    1299           1301           2          7.7         129.9       1.0X
+date_trunc MINUTE wholestage off                    605            605           0         16.5          60.5       1.0X
+date_trunc MINUTE wholestage on                     560            561           1         17.9          56.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    320            320           0         31.3          32.0       1.0X
-date_trunc SECOND wholestage on                     275            277           3         36.4          27.5       1.2X
+date_trunc SECOND wholestage off                    325            326           1         30.8          32.5       1.0X
+date_trunc SECOND wholestage on                     281            284           2         35.6          28.1       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1607           1616          12          6.2         160.7       1.0X
-date_trunc WEEK wholestage on                      1577           1583           5          6.3         157.7       1.0X
+date_trunc WEEK wholestage off                     1580           1583           4          6.3         158.0       1.0X
+date_trunc WEEK wholestage on                      1599           1601           2          6.3         159.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2037           2042           6          4.9         203.7       1.0X
-date_trunc QUARTER wholestage on                   2031           2044          20          4.9         203.1       1.0X
+date_trunc QUARTER wholestage off                  2158           2168          13          4.6         215.8       1.0X
+date_trunc QUARTER wholestage on                   2138           2140           1          4.7         213.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           943            943           0         10.6          94.3       1.0X
-trunc year wholestage on                            894            899           5         11.2          89.4       1.1X
+trunc year wholestage off                           913            914           2         11.0          91.3       1.0X
+trunc year wholestage on                            910            917           8         11.0          91.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           941            941           0         10.6          94.1       1.0X
-trunc yyyy wholestage on                            895            902          10         11.2          89.5       1.1X
+trunc yyyy wholestage off                           913            914           1         11.0          91.3       1.0X
+trunc yyyy wholestage on                            913            921           7         10.9          91.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             941            943           3         10.6          94.1       1.0X
-trunc yy wholestage on                              894            896           1         11.2          89.4       1.1X
+trunc yy wholestage off                             912            913           1         11.0          91.2       1.0X
+trunc yy wholestage on                              911            913           2         11.0          91.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            914            914           1         10.9          91.4       1.0X
-trunc mon wholestage on                             869            872           3         11.5          86.9       1.1X
+trunc mon wholestage off                            889            890           1         11.2          88.9       1.0X
+trunc mon wholestage on                             869            879          19         11.5          86.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          914            915           2         10.9          91.4       1.0X
-trunc month wholestage on                           870            877          11         11.5          87.0       1.1X
+trunc month wholestage off                          889            890           2         11.2          88.9       1.0X
+trunc month wholestage on                           869            873           4         11.5          86.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             914            915           1         10.9          91.4       1.0X
-trunc mm wholestage on                              867            871           4         11.5          86.7       1.1X
+trunc mm wholestage off                             888            900          17         11.3          88.8       1.0X
+trunc mm wholestage on                              870            876           6         11.5          87.0       1.0X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     113            115           2          8.8         113.2       1.0X
-to timestamp str wholestage on                       99            102           3         10.1          98.6       1.1X
+to timestamp str wholestage off                     105            105           1          9.6         104.6       1.0X
+to timestamp str wholestage on                      100            104           6         10.0          99.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         740            741           1          1.4         740.4       1.0X
-to_timestamp wholestage on                          715            718           3          1.4         714.7       1.0X
+to_timestamp wholestage off                         796            800           6          1.3         795.8       1.0X
+to_timestamp wholestage on                          780            782           2          1.3         779.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    738            739           1          1.4         737.6       1.0X
-to_unix_timestamp wholestage on                     723            724           2          1.4         723.0       1.0X
+to_unix_timestamp wholestage off                    789            789           0          1.3         788.8       1.0X
+to_unix_timestamp wholestage on                     765            766           2          1.3         764.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          137            137           0          7.3         137.0       1.0X
-to date str wholestage on                           130            133           4          7.7         130.2       1.1X
+to date str wholestage off                          133            134           1          7.5         133.5       1.0X
+to date str wholestage on                           132            135           2          7.6         131.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                              643            643           1          1.6         642.9       1.0X
-to_date wholestage on                               640            640           1          1.6         639.5       1.0X
+to_date wholestage off                              671            672           1          1.5         671.1       1.0X
+to_date wholestage on                               668            679          20          1.5         667.5       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  283            284           2         17.7          56.6       1.0X
-From java.time.LocalDate                            245            245           0         20.4          48.9       1.2X
-Collect java.sql.Date                              1196           1329         154          4.2         239.3       0.2X
-Collect java.time.LocalDate                         838           1051         221          6.0         167.6       0.3X
-From java.sql.Timestamp                             230            232           4         21.8          45.9       1.2X
-From java.time.Instant                              213            218           5         23.5          42.6       1.3X
-Collect longs                                      1000           1051          45          5.0         200.0       0.3X
-Collect java.sql.Timestamp                         1073           1234         170          4.7         214.7       0.3X
-Collect java.time.Instant                           963           1128         176          5.2         192.5       0.3X
-java.sql.Date to Hive string                       3889           3971         133          1.3         777.7       0.1X
-java.time.LocalDate to Hive string                 3186           3212          30          1.6         637.2       0.1X
-java.sql.Timestamp to Hive string                  6586           6653         111          0.8        1317.1       0.0X
-java.time.Instant to Hive string                   4888           4995          93          1.0         977.7       0.1X
+From java.sql.Date                                  288            289           1         17.4          57.6       1.0X
+From java.time.LocalDate                            244            246           2         20.5          48.9       1.2X
+Collect java.sql.Date                              1255           1337          72          4.0         251.0       0.2X
+Collect java.time.LocalDate                        1077           1099          20          4.6         215.3       0.3X
+From java.sql.Timestamp                             236            238           3         21.2          47.2       1.2X
+From java.time.Instant                              183            185           2         27.4          36.6       1.6X
+Collect longs                                       952           1050          98          5.3         190.3       0.3X
+Collect java.sql.Timestamp                         1002           1113          96          5.0         200.5       0.3X
+Collect java.time.Instant                           950           1050         124          5.3         190.0       0.3X
+java.sql.Date to Hive string                       4735           4742          10          1.1         946.9       0.1X
+java.time.LocalDate to Hive string                 3939           4073         160          1.3         787.7       0.1X
+java.sql.Timestamp to Hive string                  7243           7246           3          0.7        1448.6       0.0X
+java.time.Instant to Hive string                   5813           5958         141          0.9        1162.5       0.0X
 
 
diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..86c0201d35c11
--- /dev/null
+++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk21-results.txt
@@ -0,0 +1,84 @@
+================================================================================================
+Identity Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Identity Updaters:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+BooleanUpdater                                        0              0           0      16946.4           0.1       1.0X
+ByteUpdater (INT32 -> Byte)                           0              0           0       3743.2           0.3       0.2X
+ShortUpdater (INT32 -> Short)                         1              1           0       1676.4           0.6       0.1X
+IntegerUpdater                                        0              0           0      10258.9           0.1       0.6X
+LongUpdater                                           0              0           0       5140.3           0.2       0.3X
+FloatUpdater                                          0              0           0      10259.8           0.1       0.6X
+DoubleUpdater                                         0              0           0       5130.4           0.2       0.3X
+BinaryUpdater                                        15             15           0         70.4          14.2       0.0X
+
+
+================================================================================================
+Type-converting Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Type-converting Updaters:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+IntegerToLongUpdater                                     2              2           0        530.9           1.9       1.0X
+IntegerToDoubleUpdater                                   2              2           0        531.3           1.9       1.0X
+FloatToDoubleUpdater                                     2              2           0        489.7           2.0       0.9X
+DateToTimestampNTZUpdater                               29             29           0         36.2          27.6       0.1X
+DowncastLongUpdater (INT64 -> Decimal(9,2))              2              2           0        455.7           2.2       0.9X
+
+
+================================================================================================
+Rebase Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Rebase Updaters:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------
+IntegerWithRebaseUpdater (DATE legacy)                       0              0           0       3644.6           0.3       1.0X
+LongWithRebaseUpdater (TIMESTAMP_MICROS legacy)              0              0           0       2663.3           0.4       0.7X
+LongAsMicrosUpdater (TIMESTAMP_MILLIS)                       2              3           0        420.0           2.4       0.1X
+
+
+================================================================================================
+Unsigned Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Unsigned Updaters:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+UnsignedIntegerUpdater (UINT32 -> Long)                    0              0           0       5974.2           0.2       1.0X
+UnsignedLongUpdater (UINT64 -> Decimal(20,0))             17             18           0         60.3          16.6       0.0X
+
+
+================================================================================================
+Decimal Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Decimal Updaters:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+IntegerToDecimalUpdater                               0              0           0      10257.8           0.1       1.0X
+LongToDecimalUpdater                                  0              0           0       5133.7           0.2       0.5X
+FixedLenByteArrayToDecimalUpdater                    21             21           0         50.2          19.9       0.0X
+
+
+================================================================================================
+FixedLenByteArray Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+FixedLenByteArray Updaters:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------------
+FixedLenByteArrayUpdater (len=16 -> Binary)                         20             21           1         51.5          19.4       1.0X
+FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2))                7              7           0        160.1           6.2       3.1X
+FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4))              8              8           0        133.2           7.5       2.6X
+
+
diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt
new file mode 100644
index 0000000000000..d4aaaca05263a
--- /dev/null
+++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt
@@ -0,0 +1,84 @@
+================================================================================================
+Identity Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Identity Updaters:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+BooleanUpdater                                        0              0           0      17177.7           0.1       1.0X
+ByteUpdater (INT32 -> Byte)                           0              0           0       3680.4           0.3       0.2X
+ShortUpdater (INT32 -> Short)                         1              1           0       1664.2           0.6       0.1X
+IntegerUpdater                                        0              0           0      10311.6           0.1       0.6X
+LongUpdater                                           0              0           0       5153.5           0.2       0.3X
+FloatUpdater                                          0              0           0      10313.6           0.1       0.6X
+DoubleUpdater                                         0              0           0       5157.8           0.2       0.3X
+BinaryUpdater                                        16             16           0         67.6          14.8       0.0X
+
+
+================================================================================================
+Type-converting Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Type-converting Updaters:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+IntegerToLongUpdater                                     2              2           0        454.8           2.2       1.0X
+IntegerToDoubleUpdater                                   2              2           0        454.5           2.2       1.0X
+FloatToDoubleUpdater                                     2              2           0        483.4           2.1       1.1X
+DateToTimestampNTZUpdater                               29             29           0         36.6          27.3       0.1X
+DowncastLongUpdater (INT64 -> Decimal(9,2))              2              2           0        455.5           2.2       1.0X
+
+
+================================================================================================
+Rebase Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Rebase Updaters:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------
+IntegerWithRebaseUpdater (DATE legacy)                       0              0           0       3668.8           0.3       1.0X
+LongWithRebaseUpdater (TIMESTAMP_MICROS legacy)              0              0           0       2671.2           0.4       0.7X
+LongAsMicrosUpdater (TIMESTAMP_MILLIS)                       3              3           0        371.3           2.7       0.1X
+
+
+================================================================================================
+Unsigned Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Unsigned Updaters:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+UnsignedIntegerUpdater (UINT32 -> Long)                    0              0           0       6344.0           0.2       1.0X
+UnsignedLongUpdater (UINT64 -> Decimal(20,0))             18             18           0         59.3          16.9       0.0X
+
+
+================================================================================================
+Decimal Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Decimal Updaters:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+IntegerToDecimalUpdater                               0              0           0      10280.2           0.1       1.0X
+LongToDecimalUpdater                                  0              0           0       5153.3           0.2       0.5X
+FixedLenByteArrayToDecimalUpdater                    21             21           0         50.6          19.8       0.0X
+
+
+================================================================================================
+FixedLenByteArray Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+FixedLenByteArray Updaters:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------------
+FixedLenByteArrayUpdater (len=16 -> Binary)                         21             21           1         50.5          19.8       1.0X
+FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2))                7              7           0        152.6           6.6       3.0X
+FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4))              8              8           0        127.7           7.8       2.5X
+
+
diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt
new file mode 100644
index 0000000000000..2f76df57b9bf7
--- /dev/null
+++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt
@@ -0,0 +1,84 @@
+================================================================================================
+Identity Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Identity Updaters:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+BooleanUpdater                                        0              0           0      14625.7           0.1       1.0X
+ByteUpdater (INT32 -> Byte)                           0              0           0       3672.0           0.3       0.3X
+ShortUpdater (INT32 -> Short)                         1              1           0       2053.4           0.5       0.1X
+IntegerUpdater                                        0              0           0      10284.1           0.1       0.7X
+LongUpdater                                           0              0           0       5132.8           0.2       0.4X
+FloatUpdater                                          0              0           0      10257.9           0.1       0.7X
+DoubleUpdater                                         0              0           0       5097.0           0.2       0.3X
+BinaryUpdater                                        15             15           1         70.3          14.2       0.0X
+
+
+================================================================================================
+Type-converting Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Type-converting Updaters:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+IntegerToLongUpdater                                     2              2           0        454.5           2.2       1.0X
+IntegerToDoubleUpdater                                   2              2           0        478.3           2.1       1.1X
+FloatToDoubleUpdater                                     2              2           0        480.2           2.1       1.1X
+DateToTimestampNTZUpdater                               36             36           0         29.5          33.9       0.1X
+DowncastLongUpdater (INT64 -> Decimal(9,2))              2              2           0        455.3           2.2       1.0X
+
+
+================================================================================================
+Rebase Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Rebase Updaters:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------
+IntegerWithRebaseUpdater (DATE legacy)                       0              0           0       2651.7           0.4       1.0X
+LongWithRebaseUpdater (TIMESTAMP_MICROS legacy)              0              1           0       2101.9           0.5       0.8X
+LongAsMicrosUpdater (TIMESTAMP_MILLIS)                       2              2           0        454.6           2.2       0.2X
+
+
+================================================================================================
+Unsigned Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Unsigned Updaters:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+UnsignedIntegerUpdater (UINT32 -> Long)                    1              1           0       1093.3           0.9       1.0X
+UnsignedLongUpdater (UINT64 -> Decimal(20,0))             18             18           0         59.1          16.9       0.1X
+
+
+================================================================================================
+Decimal Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Decimal Updaters:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+IntegerToDecimalUpdater                               0              0           0      10263.1           0.1       1.0X
+LongToDecimalUpdater                                  0              0           0       5133.0           0.2       0.5X
+FixedLenByteArrayToDecimalUpdater                    21             21           0         51.0          19.6       0.0X
+
+
+================================================================================================
+FixedLenByteArray Updaters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+FixedLenByteArray Updaters:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------------
+FixedLenByteArrayUpdater (len=16 -> Binary)                         19             19           0         54.8          18.3       1.0X
+FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2))                7              7           0        160.2           6.2       2.9X
+FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4))              9              9           0        123.3           8.1       2.3X
+
+
diff --git a/sql/core/benchmarks/UnionBenchmark-jdk21-results.txt b/sql/core/benchmarks/UnionBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..03b98247faf74
--- /dev/null
+++ b/sql/core/benchmarks/UnionBenchmark-jdk21-results.txt
@@ -0,0 +1,480 @@
+================================================================================================
+Union plain (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=2:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          59             89          20          0.3        2958.6       1.0X
+codegen=on                                           29             41          10          0.7        1439.9       2.1X
+
+
+================================================================================================
+Union plain (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=4:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          35             47          12          1.1         882.6       1.0X
+codegen=on                                           29             48          15          1.4         733.6       1.2X
+
+
+================================================================================================
+Union plain (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=8:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          49             61           9          1.6         612.4       1.0X
+codegen=on                                           44             55           8          1.8         553.2       1.1X
+
+
+================================================================================================
+Union plain (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=16:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          90            117          20          1.8         563.5       1.0X
+codegen=on                                           66             84          13          2.4         414.7       1.4X
+
+
+================================================================================================
+Union plain (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=32:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         178            203          19          1.8         556.6       1.0X
+codegen=on                                          132            143          10          2.4         413.3       1.3X
+
+
+================================================================================================
+Union plain (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=64:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1435           1437           4          0.4        2241.5       1.0X
+codegen=on                                          339            349          10          1.9         530.3       4.2X
+
+
+================================================================================================
+Union plain (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=128:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        2833           3008         248          0.5        2213.1       1.0X
+codegen=on                                          925            933           8          1.4         722.5       3.1X
+
+
+================================================================================================
+Union plain (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=256:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        6822           6844          32          0.4        2664.8       1.0X
+codegen=on                                         2915           2988         102          0.9        1138.8       2.3X
+
+
+================================================================================================
+Union plain (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=512:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       20404          20460          79          0.3        3985.2       1.0X
+codegen=on                                        10992          11066         104          0.5        2146.8       1.9X
+
+
+================================================================================================
+Union plain (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=1024:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       67396          67410          19          0.2        6581.7       1.0X
+codegen=on                                        43669          46828        4467          0.2        4264.5       1.5X
+
+
+================================================================================================
+Union type widening (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=2:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          15             20           5          1.4         734.4       1.0X
+codegen=on                                           13             17           4          1.5         645.2       1.1X
+
+
+================================================================================================
+Union type widening (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=4:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          22             27           6          1.8         561.4       1.0X
+codegen=on                                           20             22           2          2.0         496.4       1.1X
+
+
+================================================================================================
+Union type widening (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=8:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          39             42           3          2.0         488.6       1.0X
+codegen=on                                           34             36           2          2.4         420.6       1.2X
+
+
+================================================================================================
+Union type widening (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=16:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          79             84           5          2.0         494.5       1.0X
+codegen=on                                           65             70           4          2.5         405.5       1.2X
+
+
+================================================================================================
+Union type widening (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         180            186           5          1.8         561.2       1.0X
+codegen=on                                          143            148           4          2.2         445.6       1.3X
+
+
+================================================================================================
+Union type widening (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1319           1360          58          0.5        2061.0       1.0X
+codegen=on                                          368            374           4          1.7         574.6       3.6X
+
+
+================================================================================================
+Union type widening (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=128:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        2783           2890         152          0.5        2174.1       1.0X
+codegen=on                                         1055           1069          20          1.2         824.2       2.6X
+
+
+================================================================================================
+Union type widening (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=256:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        7484           7516          46          0.3        2923.4       1.0X
+codegen=on                                         3864           3925          86          0.7        1509.6       1.9X
+
+
+================================================================================================
+Union type widening (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=512:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       22172          22429         364          0.2        4330.4       1.0X
+codegen=on                                        13129          13554         601          0.4        2564.2       1.7X
+
+
+================================================================================================
+Union type widening (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=1024:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       75362          75783         595          0.1        7359.6       1.0X
+codegen=on                                        51240          53747        3544          0.2        5003.9       1.5X
+
+
+================================================================================================
+Union per-child ops (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=2:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          17             21           4          1.2         830.0       1.0X
+codegen=on                                           15             18           3          1.3         753.2       1.1X
+
+
+================================================================================================
+Union per-child ops (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=4:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          26             29           4          1.6         639.7       1.0X
+codegen=on                                           23             25           1          1.7         581.2       1.1X
+
+
+================================================================================================
+Union per-child ops (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=8:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          45             48           3          1.8         566.0       1.0X
+codegen=on                                           40             42           2          2.0         494.8       1.1X
+
+
+================================================================================================
+Union per-child ops (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=16:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          92             98           9          1.7         574.6       1.0X
+codegen=on                                           78             82           3          2.0         489.0       1.2X
+
+
+================================================================================================
+Union per-child ops (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         215            219           4          1.5         673.3       1.0X
+codegen=on                                          172            177           4          1.9         536.3       1.3X
+
+
+================================================================================================
+Union per-child ops (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1352           1379          37          0.5        2113.2       1.0X
+codegen=on                                          438            442           4          1.5         684.1       3.1X
+
+
+================================================================================================
+Union per-child ops (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=128:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        3785           3835          70          0.3        2957.3       1.0X
+codegen=on                                         1281           1297          24          1.0        1000.4       3.0X
+
+
+================================================================================================
+Union per-child ops (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=256:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        9790           9806          23          0.3        3824.2       1.0X
+codegen=on                                         4042           4201         225          0.6        1578.7       2.4X
+
+
+================================================================================================
+Union per-child ops (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=512:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       25662          26059         562          0.2        5012.1       1.0X
+codegen=on                                        14046          14380         472          0.4        2743.4       1.8X
+
+
+================================================================================================
+Union per-child ops (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=1024:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       81522          82208         970          0.1        7961.2       1.0X
+codegen=on                                        64561          67106        3600          0.2        6304.7       1.3X
+
+
+================================================================================================
+Union + downstream aggregate (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=2:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          68             89          21          0.3        3409.9       1.0X
+codegen=on                                           49             65          13          0.4        2430.2       1.4X
+
+
+================================================================================================
+Union + downstream aggregate (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=4:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          62             90          16          0.6        1559.9       1.0X
+codegen=on                                           64             96          19          0.6        1590.6       1.0X
+
+
+================================================================================================
+Union + downstream aggregate (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=8:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         129            169          19          0.6        1608.6       1.0X
+codegen=on                                           71             86          11          1.1         890.2       1.8X
+
+
+================================================================================================
+Union + downstream aggregate (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=16:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         147            207          27          1.1         920.8       1.0X
+codegen=on                                          129            144          12          1.2         804.3       1.1X
+
+
+================================================================================================
+Union + downstream aggregate (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=32:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         323            342          18          1.0        1010.8       1.0X
+codegen=on                                          292            325          29          1.1         913.7       1.1X
+
+
+================================================================================================
+Union + downstream aggregate (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=64:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1702           1868         235          0.4        2658.7       1.0X
+codegen=on                                          705            709           6          0.9        1100.8       2.4X
+
+
+================================================================================================
+Union + downstream aggregate (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=128:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        4076           4123          67          0.3        3184.3       1.0X
+codegen=on                                         2028           2139         157          0.6        1584.5       2.0X
+
+
+================================================================================================
+Union + downstream aggregate (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=256:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       11047          11073          36          0.2        4315.4       1.0X
+codegen=on                                         6465           6497          45          0.4        2525.4       1.7X
+
+
+================================================================================================
+Union + downstream aggregate (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=512:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       34298          34363          91          0.1        6698.9       1.0X
+codegen=on                                        23836          25124        1822          0.2        4655.5       1.4X
+
+
+================================================================================================
+Union + downstream aggregate (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=1024:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                      116468         117298        1174          0.1       11373.8       1.0X
+codegen=on                                        93686          97399        5251          0.1        9149.0       1.2X
+
+
diff --git a/sql/core/benchmarks/UnionBenchmark-jdk25-results.txt b/sql/core/benchmarks/UnionBenchmark-jdk25-results.txt
new file mode 100644
index 0000000000000..8645acf797af7
--- /dev/null
+++ b/sql/core/benchmarks/UnionBenchmark-jdk25-results.txt
@@ -0,0 +1,480 @@
+================================================================================================
+Union plain (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=2:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          49             75          19          0.4        2459.8       1.0X
+codegen=on                                           23             33           6          0.9        1146.2       2.1X
+
+
+================================================================================================
+Union plain (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=4:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          31             38           4          1.3         767.1       1.0X
+codegen=on                                           25             33           4          1.6         636.0       1.2X
+
+
+================================================================================================
+Union plain (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=8:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          44             59           7          1.8         554.0       1.0X
+codegen=on                                           35             43           5          2.3         443.6       1.2X
+
+
+================================================================================================
+Union plain (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=16:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          81            107          22          2.0         505.0       1.0X
+codegen=on                                           59             67           7          2.7         366.3       1.4X
+
+
+================================================================================================
+Union plain (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=32:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         160            185          21          2.0         500.2       1.0X
+codegen=on                                          117            127           8          2.7         364.2       1.4X
+
+
+================================================================================================
+Union plain (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=64:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1114           1168          76          0.6        1740.9       1.0X
+codegen=on                                          297            309           9          2.2         464.5       3.7X
+
+
+================================================================================================
+Union plain (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=128:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        2452           2509          80          0.5        1915.8       1.0X
+codegen=on                                          858            861           4          1.5         670.5       2.9X
+
+
+================================================================================================
+Union plain (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=256:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        6293           6417         176          0.4        2458.0       1.0X
+codegen=on                                         2721           2785          91          0.9        1062.9       2.3X
+
+
+================================================================================================
+Union plain (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=512:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       18990          19112         172          0.3        3709.0       1.0X
+codegen=on                                        10145          10223         110          0.5        1981.4       1.9X
+
+
+================================================================================================
+Union plain (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=1024:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       63438          63586         209          0.2        6195.1       1.0X
+codegen=on                                        40631          42964        3299          0.3        3967.8       1.6X
+
+
+================================================================================================
+Union type widening (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=2:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          13             18           5          1.6         641.8       1.0X
+codegen=on                                           11             15           4          1.7         574.0       1.1X
+
+
+================================================================================================
+Union type widening (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=4:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          20             24           5          2.0         498.0       1.0X
+codegen=on                                           18             25           9          2.3         441.9       1.1X
+
+
+================================================================================================
+Union type widening (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=8:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          36             43           7          2.2         449.4       1.0X
+codegen=on                                           31             38           7          2.6         387.3       1.2X
+
+
+================================================================================================
+Union type widening (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=16:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          75             88          13          2.1         466.0       1.0X
+codegen=on                                           60             63           3          2.7         373.1       1.2X
+
+
+================================================================================================
+Union type widening (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         170            174           3          1.9         530.6       1.0X
+codegen=on                                          131            137           4          2.4         410.4       1.3X
+
+
+================================================================================================
+Union type widening (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1065           1089          35          0.6        1664.1       1.0X
+codegen=on                                          341            359          16          1.9         532.1       3.1X
+
+
+================================================================================================
+Union type widening (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=128:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        2592           2594           3          0.5        2025.0       1.0X
+codegen=on                                          970            986          19          1.3         758.1       2.7X
+
+
+================================================================================================
+Union type widening (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=256:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        6833           6855          31          0.4        2669.1       1.0X
+codegen=on                                         3216           3252          51          0.8        1256.1       2.1X
+
+
+================================================================================================
+Union type widening (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=512:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       21236          21249          19          0.2        4147.6       1.0X
+codegen=on                                        11840          12074         331          0.4        2312.5       1.8X
+
+
+================================================================================================
+Union type widening (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=1024:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       70308          70318          14          0.1        6866.0       1.0X
+codegen=on                                        52645          53667        1444          0.2        5141.2       1.3X
+
+
+================================================================================================
+Union per-child ops (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=2:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          15             20           4          1.3         759.3       1.0X
+codegen=on                                           14             18           4          1.4         715.4       1.1X
+
+
+================================================================================================
+Union per-child ops (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=4:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          24             27           4          1.7         598.9       1.0X
+codegen=on                                           22             26           6          1.9         538.7       1.1X
+
+
+================================================================================================
+Union per-child ops (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=8:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          44             57          10          1.8         548.1       1.0X
+codegen=on                                           39             47           9          2.1         486.7       1.1X
+
+
+================================================================================================
+Union per-child ops (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=16:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          88             96           6          1.8         548.9       1.0X
+codegen=on                                           74             93          13          2.2         460.5       1.2X
+
+
+================================================================================================
+Union per-child ops (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         196            202           6          1.6         612.5       1.0X
+codegen=on                                          157            164           4          2.0         489.2       1.3X
+
+
+================================================================================================
+Union per-child ops (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1395           1419          34          0.5        2180.0       1.0X
+codegen=on                                          412            419           9          1.6         644.1       3.4X
+
+
+================================================================================================
+Union per-child ops (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=128:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        3736           3765          41          0.3        2918.7       1.0X
+codegen=on                                         1182           1216          48          1.1         923.4       3.2X
+
+
+================================================================================================
+Union per-child ops (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=256:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        9338           9396          81          0.3        3647.7       1.0X
+codegen=on                                         3651           3715          91          0.7        1426.1       2.6X
+
+
+================================================================================================
+Union per-child ops (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=512:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       25446          25691         347          0.2        4970.0       1.0X
+codegen=on                                        13850          13972         173          0.4        2705.0       1.8X
+
+
+================================================================================================
+Union per-child ops (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=1024:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       78002          78303         426          0.1        7617.4       1.0X
+codegen=on                                        52726          55567        4018          0.2        5149.0       1.5X
+
+
+================================================================================================
+Union + downstream aggregate (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=2:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          51             68          18          0.4        2545.9       1.0X
+codegen=on                                           33             45           8          0.6        1637.8       1.6X
+
+
+================================================================================================
+Union + downstream aggregate (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=4:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          49             67          11          0.8        1225.5       1.0X
+codegen=on                                           40             48           8          1.0        1001.2       1.2X
+
+
+================================================================================================
+Union + downstream aggregate (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=8:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          69             77           8          1.2         856.9       1.0X
+codegen=on                                           60             71          10          1.3         748.0       1.1X
+
+
+================================================================================================
+Union + downstream aggregate (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=16:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         130            143           8          1.2         812.4       1.0X
+codegen=on                                          113            131          24          1.4         708.6       1.1X
+
+
+================================================================================================
+Union + downstream aggregate (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=32:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         285            322          25          1.1         890.5       1.0X
+codegen=on                                          313            342          22          1.0         976.6       0.9X
+
+
+================================================================================================
+Union + downstream aggregate (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=64:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1507           1522          21          0.4        2354.0       1.0X
+codegen=on                                          647            694          70          1.0        1010.3       2.3X
+
+
+================================================================================================
+Union + downstream aggregate (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=128:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        3642           3739         137          0.4        2845.5       1.0X
+codegen=on                                         1902           1991         126          0.7        1486.0       1.9X
+
+
+================================================================================================
+Union + downstream aggregate (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=256:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       10067          10091          34          0.3        3932.4       1.0X
+codegen=on                                         6604           6708         146          0.4        2579.7       1.5X
+
+
+================================================================================================
+Union + downstream aggregate (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=512:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       31875          31913          54          0.2        6225.5       1.0X
+codegen=on                                        21053          21376         458          0.2        4111.9       1.5X
+
+
+================================================================================================
+Union + downstream aggregate (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=1024:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                      109873         110819        1339          0.1       10729.7       1.0X
+codegen=on                                        82461          84408        2753          0.1        8052.8       1.3X
+
+
diff --git a/sql/core/benchmarks/UnionBenchmark-results.txt b/sql/core/benchmarks/UnionBenchmark-results.txt
new file mode 100644
index 0000000000000..2e956534bf17c
--- /dev/null
+++ b/sql/core/benchmarks/UnionBenchmark-results.txt
@@ -0,0 +1,480 @@
+================================================================================================
+Union plain (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=2:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          52             74          17          0.4        2598.0       1.0X
+codegen=on                                           26             38           6          0.8        1313.5       2.0X
+
+
+================================================================================================
+Union plain (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=4:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          32             42           5          1.2         809.5       1.0X
+codegen=on                                           31             41           5          1.3         776.9       1.0X
+
+
+================================================================================================
+Union plain (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=8:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          54             63           6          1.5         672.4       1.0X
+codegen=on                                           42             54           7          1.9         522.9       1.3X
+
+
+================================================================================================
+Union plain (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=16:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          93            112          15          1.7         583.9       1.0X
+codegen=on                                           71             88          12          2.2         445.8       1.3X
+
+
+================================================================================================
+Union plain (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=32:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         183            205          19          1.7         572.1       1.0X
+codegen=on                                          146            156           9          2.2         455.8       1.3X
+
+
+================================================================================================
+Union plain (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=64:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1226           1291          93          0.5        1915.4       1.0X
+codegen=on                                          364            384          22          1.8         568.3       3.4X
+
+
+================================================================================================
+Union plain (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=128:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        2841           2941         141          0.5        2219.9       1.0X
+codegen=on                                         1035           1039           5          1.2         808.6       2.7X
+
+
+================================================================================================
+Union plain (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=256:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        7282           7372         126          0.4        2844.7       1.0X
+codegen=on                                         3490           3560          99          0.7        1363.1       2.1X
+
+
+================================================================================================
+Union plain (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=512:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       21272          21287          21          0.2        4154.8       1.0X
+codegen=on                                        12509          12760         355          0.4        2443.1       1.7X
+
+
+================================================================================================
+Union plain (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union plain, N=1024:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       71083          71242         225          0.1        6941.7       1.0X
+codegen=on                                        50694          52393        2403          0.2        4950.6       1.4X
+
+
+================================================================================================
+Union type widening (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=2:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          16             20           3          1.2         820.7       1.0X
+codegen=on                                           15             17           2          1.3         745.6       1.1X
+
+
+================================================================================================
+Union type widening (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=4:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          23             26           3          1.7         582.0       1.0X
+codegen=on                                           22             24           3          1.8         543.2       1.1X
+
+
+================================================================================================
+Union type widening (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=8:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          42             46           3          1.9         522.5       1.0X
+codegen=on                                           36             39           2          2.2         450.5       1.2X
+
+
+================================================================================================
+Union type widening (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=16:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          84             90           6          1.9         527.2       1.0X
+codegen=on                                           71             75           4          2.3         442.3       1.2X
+
+
+================================================================================================
+Union type widening (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         194            202           9          1.7         605.1       1.0X
+codegen=on                                          157            162           4          2.0         490.4       1.2X
+
+
+================================================================================================
+Union type widening (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1230           1233           4          0.5        1922.0       1.0X
+codegen=on                                          407            412           4          1.6         635.2       3.0X
+
+
+================================================================================================
+Union type widening (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=128:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        2936           2944          11          0.4        2294.0       1.0X
+codegen=on                                         1204           1218          20          1.1         940.6       2.4X
+
+
+================================================================================================
+Union type widening (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=256:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        7922           7955          47          0.3        3094.5       1.0X
+codegen=on                                         4070           4106          51          0.6        1589.8       1.9X
+
+
+================================================================================================
+Union type widening (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=512:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       23551          23582          45          0.2        4599.7       1.0X
+codegen=on                                        14854          15112         364          0.3        2901.3       1.6X
+
+
+================================================================================================
+Union type widening (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union type widening, N=1024:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       77463          77935         668          0.1        7564.8       1.0X
+codegen=on                                        59960          61705        2468          0.2        5855.4       1.3X
+
+
+================================================================================================
+Union per-child ops (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=2:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          17             21           4          1.2         831.1       1.0X
+codegen=on                                           15             18           2          1.3         772.4       1.1X
+
+
+================================================================================================
+Union per-child ops (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=4:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          27             29           3          1.5         664.0       1.0X
+codegen=on                                           24             26           2          1.7         597.4       1.1X
+
+
+================================================================================================
+Union per-child ops (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=8:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          48             51           2          1.7         604.3       1.0X
+codegen=on                                           43             45           3          1.9         532.5       1.1X
+
+
+================================================================================================
+Union per-child ops (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=16:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          97            102           4          1.6         608.7       1.0X
+codegen=on                                           83             87           3          1.9         521.8       1.2X
+
+
+================================================================================================
+Union per-child ops (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         225            233           6          1.4         703.2       1.0X
+codegen=on                                          188            194           5          1.7         587.2       1.2X
+
+
+================================================================================================
+Union per-child ops (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1583           1648          92          0.4        2474.0       1.0X
+codegen=on                                          483            487           3          1.3         755.2       3.3X
+
+
+================================================================================================
+Union per-child ops (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=128:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        3933           3941          11          0.3        3072.8       1.0X
+codegen=on                                         1399           1425          37          0.9        1093.0       2.8X
+
+
+================================================================================================
+Union per-child ops (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=256:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        9835           9873          53          0.3        3841.9       1.0X
+codegen=on                                         4583           4650          95          0.6        1790.3       2.1X
+
+
+================================================================================================
+Union per-child ops (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=512:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       26998          27179         256          0.2        5273.1       1.0X
+codegen=on                                        16455          16719         373          0.3        3213.9       1.6X
+
+
+================================================================================================
+Union per-child ops (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union per-child ops, N=1024:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       88874          88916          60          0.1        8679.1       1.0X
+codegen=on                                        65006          67695        3803          0.2        6348.2       1.4X
+
+
+================================================================================================
+Union + downstream aggregate (N=2)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=2:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          54             65           6          0.4        2708.4       1.0X
+codegen=on                                           36             45           6          0.6        1776.2       1.5X
+
+
+================================================================================================
+Union + downstream aggregate (N=4)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=4:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          60             76           9          0.7        1511.7       1.0X
+codegen=on                                           48             57           7          0.8        1196.3       1.3X
+
+
+================================================================================================
+Union + downstream aggregate (N=8)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=8:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                          80             94          12          1.0         995.3       1.0X
+codegen=on                                           73             83           7          1.1         912.3       1.1X
+
+
+================================================================================================
+Union + downstream aggregate (N=16)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=16:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         151            173          18          1.1         942.2       1.0X
+codegen=on                                          136            148          13          1.2         852.9       1.1X
+
+
+================================================================================================
+Union + downstream aggregate (N=32)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=32:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                         389            401          15          0.8        1215.3       1.0X
+codegen=on                                          375            414          36          0.9        1170.7       1.0X
+
+
+================================================================================================
+Union + downstream aggregate (N=64)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=64:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        1662           1667           6          0.4        2597.6       1.0X
+codegen=on                                          771            786          20          0.8        1205.0       2.2X
+
+
+================================================================================================
+Union + downstream aggregate (N=128)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=128:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                        4128           4158          42          0.3        3225.3       1.0X
+codegen=on                                         2244           2247           5          0.6        1752.7       1.8X
+
+
+================================================================================================
+Union + downstream aggregate (N=256)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=256:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       11367          11549         257          0.2        4440.4       1.0X
+codegen=on                                         7226           7719         697          0.4        2822.5       1.6X
+
+
+================================================================================================
+Union + downstream aggregate (N=512)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=512:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                       35591          35784         273          0.1        6951.3       1.0X
+codegen=on                                        26016          26668         922          0.2        5081.3       1.4X
+
+
+================================================================================================
+Union + downstream aggregate (N=1024)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Union + downstream aggregate, N=1024:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen=off                                      121937         122855        1299          0.1       11907.9       1.0X
+codegen=on                                       105631         109213        5066          0.1       10315.5       1.2X
+
+
diff --git a/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-jdk21-results.txt b/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..a7199750be601
--- /dev/null
+++ b/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-jdk21-results.txt
@@ -0,0 +1,93 @@
+================================================================================================
+DELTA_BINARY_PACKED INT32
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BINARY_PACKED INT32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readIntegers, constant                                2              2           0        451.8           2.2       1.0X
+skipIntegers, constant                                3              3           0        415.1           2.4       0.9X
+readIntegers, monotonic                               3              3           0        369.7           2.7       0.8X
+skipIntegers, monotonic                               3              3           0        415.6           2.4       0.9X
+readIntegers, small-delta random                      3              3           0        308.6           3.2       0.7X
+skipIntegers, small-delta random                      3              3           0        358.6           2.8       0.8X
+readIntegers, wide random                             4              4           0        249.2           4.0       0.6X
+skipIntegers, wide random                             4              4           0        281.4           3.6       0.6X
+
+
+================================================================================================
+DELTA_BINARY_PACKED INT64
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BINARY_PACKED INT64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readLongs, constant                                   5              6           0        195.2           5.1       1.0X
+skipLongs, constant                                   6              6           0        175.4           5.7       0.9X
+readLongs, monotonic                                  7              7           0        158.8           6.3       0.8X
+skipLongs, monotonic                                  6              6           0        175.6           5.7       0.9X
+readLongs, small-delta random                         8              8           0        139.4           7.2       0.7X
+skipLongs, small-delta random                         7              7           0        152.5           6.6       0.8X
+readLongs, wide random                               10             10           1        102.9           9.7       0.5X
+skipLongs, wide random                                6              9           2        185.5           5.4       1.0X
+
+
+================================================================================================
+DELTA_BYTE_ARRAY
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BYTE_ARRAY:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, no overlap, len=16                       35             41           4         29.6          33.8       1.0X
+skipBinary, no overlap, len=16                       41             45           3         25.9          38.6       0.9X
+readBinary, half overlap, len=16                     41             44           3         25.4          39.3       0.9X
+skipBinary, half overlap, len=16                     47             50           5         22.3          44.8       0.8X
+readBinary, full overlap, len=16                     42             44           3         25.1          39.9       0.8X
+skipBinary, full overlap, len=16                     48             50           3         22.0          45.6       0.7X
+readBinary, half overlap, len=64                     42             42           1         25.2          39.6       0.9X
+skipBinary, half overlap, len=64                     47             50          14         22.4          44.6       0.8X
+
+
+================================================================================================
+DELTA_LENGTH_BYTE_ARRAY
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_LENGTH_BYTE_ARRAY:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, payloadLen=8                             20             21           0         51.7          19.3       1.0X
+skipBinary, payloadLen=8                             10             10           0        106.3           9.4       2.1X
+readBinary, payloadLen=32                            16             18           2         63.9          15.7       1.2X
+skipBinary, payloadLen=32                             6              6           0        175.9           5.7       3.4X
+readBinary, payloadLen=128                           19             19           0         56.0          17.8       1.1X
+skipBinary, payloadLen=128                            6              6           0        176.3           5.7       3.4X
+readBinary, payloadLen=512                           41             43           3         25.6          39.0       0.5X
+skipBinary, payloadLen=512                            6              6           1        176.4           5.7       3.4X
+
+
+================================================================================================
+Variant reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+Variant reads:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------
+readBytes (INT32)                                            7              7           0        142.3           7.0       1.0X
+readShorts (INT32)                                           7              8           0        143.0           7.0       1.0X
+readUnsignedIntegers (INT32 -> Long)                         7              7           0        147.1           6.8       1.0X
+readUnsignedLongs (INT64 -> Decimal(20,0))                 232            243          23          4.5         221.5       0.0X
+skipBytes                                                    4              4           0        285.4           3.5       2.0X
+skipShorts                                                   4              4           0        285.4           3.5       2.0X
+readByte (INT32 single-value)                               13             13           1         80.2          12.5       0.6X
+readShort (INT32 single-value)                              13             13           1         82.0          12.2       0.6X
+readInteger (INT32 single-value)                            13             13           0         80.2          12.5       0.6X
+readLong (INT64 single-value)                               14             14           0         74.9          13.3       0.5X
+readBinary(len) (DELTA_BYTE_ARRAY single-value)             64             67           3         16.5          60.6       0.1X
+
+
diff --git a/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-jdk25-results.txt b/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-jdk25-results.txt
new file mode 100644
index 0000000000000..fe7b64c3834b3
--- /dev/null
+++ b/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-jdk25-results.txt
@@ -0,0 +1,93 @@
+================================================================================================
+DELTA_BINARY_PACKED INT32
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BINARY_PACKED INT32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readIntegers, constant                                2              2           0        487.1           2.1       1.0X
+skipIntegers, constant                                3              3           0        364.8           2.7       0.7X
+readIntegers, monotonic                               3              4           0        303.1           3.3       0.6X
+skipIntegers, monotonic                               3              3           0        365.3           2.7       0.7X
+readIntegers, small-delta random                      4              4           0        241.3           4.1       0.5X
+skipIntegers, small-delta random                      4              4           0        278.7           3.6       0.6X
+readIntegers, wide random                             5              5           0        201.4           5.0       0.4X
+skipIntegers, wide random                             5              5           0        225.6           4.4       0.5X
+
+
+================================================================================================
+DELTA_BINARY_PACKED INT64
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BINARY_PACKED INT64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readLongs, constant                                   6              7           1        163.0           6.1       1.0X
+skipLongs, constant                                   7              7           0        152.1           6.6       0.9X
+readLongs, monotonic                                  8              8           1        139.1           7.2       0.9X
+skipLongs, monotonic                                  7              7           0        152.0           6.6       0.9X
+readLongs, small-delta random                         9              9           0        122.7           8.2       0.8X
+skipLongs, small-delta random                         8              8           0        133.9           7.5       0.8X
+readLongs, wide random                               11             11           0         94.4          10.6       0.6X
+skipLongs, wide random                               10             10           0        100.9           9.9       0.6X
+
+
+================================================================================================
+DELTA_BYTE_ARRAY
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BYTE_ARRAY:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, no overlap, len=16                       37             38           2         28.2          35.5       1.0X
+skipBinary, no overlap, len=16                       42             43           2         25.2          39.7       0.9X
+readBinary, half overlap, len=16                     43             44           2         24.3          41.2       0.9X
+skipBinary, half overlap, len=16                     48             49           2         21.8          45.9       0.8X
+readBinary, full overlap, len=16                     44             47           4         24.0          41.7       0.9X
+skipBinary, full overlap, len=16                     48             50           2         21.6          46.3       0.8X
+readBinary, half overlap, len=64                     44             45           2         23.8          42.0       0.8X
+skipBinary, half overlap, len=64                     49             53           4         21.6          46.3       0.8X
+
+
+================================================================================================
+DELTA_LENGTH_BYTE_ARRAY
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_LENGTH_BYTE_ARRAY:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, payloadLen=8                             22             22           0         48.7          20.5       1.0X
+skipBinary, payloadLen=8                             11             11           0         97.0          10.3       2.0X
+readBinary, payloadLen=32                            21             22           0         48.9          20.5       1.0X
+skipBinary, payloadLen=32                            11             11           0         97.0          10.3       2.0X
+readBinary, payloadLen=128                           24             24           1         43.7          22.9       0.9X
+skipBinary, payloadLen=128                           11             11           1         97.1          10.3       2.0X
+readBinary, payloadLen=512                           51             53           1         20.5          48.7       0.4X
+skipBinary, payloadLen=512                           11             11           0         97.1          10.3       2.0X
+
+
+================================================================================================
+Variant reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Variant reads:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------
+readBytes (INT32)                                            9             10           1        115.6           8.6       1.0X
+readShorts (INT32)                                           9             10           1        116.0           8.6       1.0X
+readUnsignedIntegers (INT32 -> Long)                         9              9           1        117.1           8.5       1.0X
+readUnsignedLongs (INT64 -> Decimal(20,0))                 211            223          27          5.0         201.5       0.0X
+skipBytes                                                    9              9           0        122.2           8.2       1.1X
+skipShorts                                                   9              9           0        122.3           8.2       1.1X
+readByte (INT32 single-value)                               13             13           0         80.1          12.5       0.7X
+readShort (INT32 single-value)                              14             15           1         72.7          13.7       0.6X
+readInteger (INT32 single-value)                            14             15           0         72.9          13.7       0.6X
+readLong (INT64 single-value)                               16             17           1         64.3          15.6       0.6X
+readBinary(len) (DELTA_BYTE_ARRAY single-value)             79             81           2         13.3          74.9       0.1X
+
+
diff --git a/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-results.txt b/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-results.txt
new file mode 100644
index 0000000000000..9ff1678901e9f
--- /dev/null
+++ b/sql/core/benchmarks/VectorizedDeltaReaderBenchmark-results.txt
@@ -0,0 +1,93 @@
+================================================================================================
+DELTA_BINARY_PACKED INT32
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BINARY_PACKED INT32:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readIntegers, constant                                2              2           0        482.3           2.1       1.0X
+skipIntegers, constant                                3              3           0        335.4           3.0       0.7X
+readIntegers, monotonic                               3              3           0        314.9           3.2       0.7X
+skipIntegers, monotonic                               3              3           0        335.1           3.0       0.7X
+readIntegers, small-delta random                      4              4           0        257.5           3.9       0.5X
+skipIntegers, small-delta random                      4              4           0        269.5           3.7       0.6X
+readIntegers, wide random                             5              5           0        209.3           4.8       0.4X
+skipIntegers, wide random                             5              5           0        218.2           4.6       0.5X
+
+
+================================================================================================
+DELTA_BINARY_PACKED INT64
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BINARY_PACKED INT64:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readLongs, constant                                   5              6           1        191.0           5.2       1.0X
+skipLongs, constant                                   6              6           0        170.1           5.9       0.9X
+readLongs, monotonic                                  7              7           0        144.9           6.9       0.8X
+skipLongs, monotonic                                  6              6           0        170.1           5.9       0.9X
+readLongs, small-delta random                         8              8           0        134.8           7.4       0.7X
+skipLongs, small-delta random                         7              7           0        152.8           6.5       0.8X
+readLongs, wide random                               11             11           0         97.8          10.2       0.5X
+skipLongs, wide random                               10             10           0        106.4           9.4       0.6X
+
+
+================================================================================================
+DELTA_BYTE_ARRAY
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_BYTE_ARRAY:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, no overlap, len=16                       36             37           1         29.2          34.2       1.0X
+skipBinary, no overlap, len=16                       41             41           1         25.8          38.7       0.9X
+readBinary, half overlap, len=16                     42             43           1         25.0          39.9       0.9X
+skipBinary, half overlap, len=16                     48             50           2         21.6          46.2       0.7X
+readBinary, full overlap, len=16                     42             44           2         24.8          40.3       0.8X
+skipBinary, full overlap, len=16                     48             49           1         21.8          45.9       0.7X
+readBinary, half overlap, len=64                     42             44           1         24.8          40.4       0.8X
+skipBinary, half overlap, len=64                     47             49           1         22.2          45.1       0.8X
+
+
+================================================================================================
+DELTA_LENGTH_BYTE_ARRAY
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+DELTA_LENGTH_BYTE_ARRAY:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, payloadLen=8                             22             25           2         48.1          20.8       1.0X
+skipBinary, payloadLen=8                             11             12           1         95.8          10.4       2.0X
+readBinary, payloadLen=32                            22             26           3         48.0          20.9       1.0X
+skipBinary, payloadLen=32                            11             12           1         97.6          10.2       2.0X
+readBinary, payloadLen=128                           26             29           2         40.0          25.0       0.8X
+skipBinary, payloadLen=128                           11             12           1         96.0          10.4       2.0X
+readBinary, payloadLen=512                           54             62           4         19.3          51.9       0.4X
+skipBinary, payloadLen=512                           11             13           2         95.7          10.4       2.0X
+
+
+================================================================================================
+Variant reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+Variant reads:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------
+readBytes (INT32)                                            7              7           0        145.8           6.9       1.0X
+readShorts (INT32)                                           7              8           1        140.7           7.1       1.0X
+readUnsignedIntegers (INT32 -> Long)                         7              7           0        143.3           7.0       1.0X
+readUnsignedLongs (INT64 -> Decimal(20,0))                 227            239          25          4.6         216.4       0.0X
+skipBytes                                                    8              8           0        136.8           7.3       0.9X
+skipShorts                                                   8              8           0        136.6           7.3       0.9X
+readByte (INT32 single-value)                               12             12           1         85.8          11.7       0.6X
+readShort (INT32 single-value)                              12             12           0         85.9          11.6       0.6X
+readInteger (INT32 single-value)                            12             12           0         86.1          11.6       0.6X
+readLong (INT64 single-value)                               14             15           1         73.4          13.6       0.5X
+readBinary(len) (DELTA_BYTE_ARRAY single-value)             71             77           5         14.7          68.2       0.1X
+
+
diff --git a/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-jdk21-results.txt b/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..e3d8c57003939
--- /dev/null
+++ b/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-jdk21-results.txt
@@ -0,0 +1,86 @@
+================================================================================================
+Fixed-size bulk reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Fixed-size bulk reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBooleans                                          0              0           0      20570.4           0.0       1.0X
+readBytes                                             0              0           0       3671.3           0.3       0.2X
+readShorts                                            0              0           0       3317.6           0.3       0.2X
+readIntegers                                          0              0           0       7683.8           0.1       0.4X
+readLongs                                             0              0           0       3812.5           0.3       0.2X
+readFloats                                            0              0           0      10254.8           0.1       0.5X
+readDoubles                                           0              0           0       5122.9           0.2       0.2X
+
+
+================================================================================================
+Conversion bulk reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Conversion bulk reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readUnsignedIntegers                                  0              0           0       5940.6           0.2       1.0X
+readUnsignedLongs                                    17             17           0         61.3          16.3       0.0X
+readIntegersWithRebase, no rebase needed              0              0           0       3276.6           0.3       0.6X
+readLongsWithRebase, no rebase needed                 0              0           0       2279.6           0.4       0.4X
+
+
+================================================================================================
+Variable-length reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Variable-length reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, payloadLen=8                             14             14           1         74.0          13.5       1.0X
+readBinary, payloadLen=32                            15             15           1         71.2          14.0       1.0X
+readBinary, payloadLen=128                           17             18           1         60.5          16.5       0.8X
+readBinary, payloadLen=512                           37             38           0         28.3          35.3       0.4X
+
+
+================================================================================================
+Single-value reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Single-value reads:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBoolean                                           0              0           0       2342.8           0.4       1.0X
+readByte                                              0              0           0       2681.3           0.4       1.1X
+readShort                                             0              0           0       2681.3           0.4       1.1X
+readInteger                                           0              0           0       2681.3           0.4       1.1X
+readLong                                              0              0           0       2681.3           0.4       1.1X
+readFloat                                             0              0           0       2681.3           0.4       1.1X
+readDouble                                            0              0           0       2683.1           0.4       1.1X
+
+
+================================================================================================
+Skip
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Skip:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+skipBinary, payloadLen=8                             12             12           1         90.4          11.1       1.0X
+skipBinary, payloadLen=32                            12             12           1         90.3          11.1       1.0X
+skipBinary, payloadLen=128                           12             13           1         85.5          11.7       0.9X
+skipBinary, payloadLen=512                           12             13           1         85.1          11.7       0.9X
+skipFixedLenByteArray, len=4                          0              0           0   36157793.1           0.0  399858.2X
+skipFixedLenByteArray, len=16                         0              0           0   36157793.1           0.0  399858.2X
+skipFixedLenByteArray, len=64                         0              0           0   36157793.1           0.0  399858.2X
+skipBooleans                                          0              0           0   36157793.1           0.0  399858.2X
+skipBytes                                             0              0           0   36157793.1           0.0  399858.2X
+skipShorts                                            0              0           0   36157793.1           0.0  399858.2X
+skipIntegers                                          0              0           0   36157793.1           0.0  399858.2X
+skipLongs                                             0              0           0   36157793.1           0.0  399858.2X
+skipFloats                                            0              0           0   36157793.1           0.0  399858.2X
+skipDoubles                                           0              0           0   36157793.1           0.0  399858.2X
+
+
diff --git a/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-jdk25-results.txt b/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-jdk25-results.txt
new file mode 100644
index 0000000000000..6af1f23a9efa8
--- /dev/null
+++ b/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-jdk25-results.txt
@@ -0,0 +1,86 @@
+================================================================================================
+Fixed-size bulk reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Fixed-size bulk reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBooleans                                          0              0           0      20232.6           0.0       1.0X
+readBytes                                             0              0           0       3747.3           0.3       0.2X
+readShorts                                            0              1           0       3321.0           0.3       0.2X
+readIntegers                                          0              0           0       7767.7           0.1       0.4X
+readLongs                                             0              0           0       3882.9           0.3       0.2X
+readFloats                                            0              0           0      10246.0           0.1       0.5X
+readDoubles                                           0              0           0       3881.0           0.3       0.2X
+
+
+================================================================================================
+Conversion bulk reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Conversion bulk reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readUnsignedIntegers                                  0              0           0       5093.5           0.2       1.0X
+readUnsignedLongs                                    18             19           1         59.2          16.9       0.0X
+readIntegersWithRebase, no rebase needed              0              0           0       3282.5           0.3       0.6X
+readLongsWithRebase, no rebase needed                 0              0           0       2287.3           0.4       0.4X
+
+
+================================================================================================
+Variable-length reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Variable-length reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, payloadLen=8                             15             16           0         68.4          14.6       1.0X
+readBinary, payloadLen=32                            16             16           0         66.2          15.1       1.0X
+readBinary, payloadLen=128                           18             18           0         58.5          17.1       0.9X
+readBinary, payloadLen=512                           37             38           1         28.1          35.6       0.4X
+
+
+================================================================================================
+Single-value reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Single-value reads:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBoolean                                           0              0           0       2453.6           0.4       1.0X
+readByte                                              0              0           0       2659.6           0.4       1.1X
+readShort                                             0              0           0       2659.6           0.4       1.1X
+readInteger                                           0              0           0       2659.6           0.4       1.1X
+readLong                                              0              0           0       2659.6           0.4       1.1X
+readFloat                                             0              0           0       2659.7           0.4       1.1X
+readDouble                                            0              0           0       2659.6           0.4       1.1X
+
+
+================================================================================================
+Skip
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Skip:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+skipBinary, payloadLen=8                              6              6           0        171.0           5.8       1.0X
+skipBinary, payloadLen=32                             6              6           0        169.5           5.9       1.0X
+skipBinary, payloadLen=128                            9              9           0        115.7           8.6       0.7X
+skipBinary, payloadLen=512                            9              9           0        117.1           8.5       0.7X
+skipFixedLenByteArray, len=4                          0              0           0   36157793.1           0.0  211494.7X
+skipFixedLenByteArray, len=16                         0              0           0   36157793.1           0.0  211494.7X
+skipFixedLenByteArray, len=64                         0              0           0   36157793.1           0.0  211494.7X
+skipBooleans                                          0              0           0   36157793.1           0.0  211494.7X
+skipBytes                                             0              0           0   36157793.1           0.0  211494.7X
+skipShorts                                            0              0           0   36157793.1           0.0  211494.7X
+skipIntegers                                          0              0           0   36157793.1           0.0  211494.7X
+skipLongs                                             0              0           0   36157793.1           0.0  211494.7X
+skipFloats                                            0              0           0   36157793.1           0.0  211494.7X
+skipDoubles                                           0              0           0   36157793.1           0.0  211494.7X
+
+
diff --git a/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-results.txt b/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-results.txt
new file mode 100644
index 0000000000000..729e5c714094c
--- /dev/null
+++ b/sql/core/benchmarks/VectorizedPlainValuesReaderBenchmark-results.txt
@@ -0,0 +1,86 @@
+================================================================================================
+Fixed-size bulk reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Fixed-size bulk reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBooleans                                          0              0           0      20546.2           0.0       1.0X
+readBytes                                             0              0           0       3654.2           0.3       0.2X
+readShorts                                            0              1           0       2154.3           0.5       0.1X
+readIntegers                                          0              0           0      10214.0           0.1       0.5X
+readLongs                                             0              0           0       5137.3           0.2       0.3X
+readFloats                                            0              0           0      10239.9           0.1       0.5X
+readDoubles                                           0              0           0       5135.5           0.2       0.2X
+
+
+================================================================================================
+Conversion bulk reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Conversion bulk reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readUnsignedIntegers                                  1              1           0       1093.8           0.9       1.0X
+readUnsignedLongs                                    17             17           0         61.2          16.3       0.1X
+readIntegersWithRebase, no rebase needed              0              0           0       2675.3           0.4       2.4X
+readLongsWithRebase, no rebase needed                 1              1           0       2086.2           0.5       1.9X
+
+
+================================================================================================
+Variable-length reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Variable-length reads:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBinary, payloadLen=8                             15             15           3         72.0          13.9       1.0X
+readBinary, payloadLen=32                            15             15           3         70.0          14.3       1.0X
+readBinary, payloadLen=128                           17             18           2         60.3          16.6       0.8X
+readBinary, payloadLen=512                           38             39           1         27.6          36.2       0.4X
+
+
+================================================================================================
+Single-value reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Single-value reads:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBoolean                                           1              1           0       1842.8           0.5       1.0X
+readByte                                              1              1           0        917.1           1.1       0.5X
+readShort                                             1              1           0        916.8           1.1       0.5X
+readInteger                                           1              1           0        917.1           1.1       0.5X
+readLong                                              1              1           0        917.1           1.1       0.5X
+readFloat                                             1              1           0        917.1           1.1       0.5X
+readDouble                                            1              1           0        917.0           1.1       0.5X
+
+
+================================================================================================
+Skip
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Skip:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+skipBinary, payloadLen=8                              5              5           0        214.0           4.7       1.0X
+skipBinary, payloadLen=32                             5              5           0        210.8           4.7       1.0X
+skipBinary, payloadLen=128                            9              9           0        119.3           8.4       0.6X
+skipBinary, payloadLen=512                            8              9           1        124.0           8.1       0.6X
+skipFixedLenByteArray, len=4                          0              0           0   26214400.0           0.0  122511.1X
+skipFixedLenByteArray, len=16                         0              0           0   26214400.0           0.0  122511.1X
+skipFixedLenByteArray, len=64                         0              0           0   26214400.0           0.0  122511.1X
+skipBooleans                                          0              0           0   26214400.0           0.0  122511.1X
+skipBytes                                             0              0           0   26214400.0           0.0  122511.1X
+skipShorts                                            0              0           0   26214400.0           0.0  122511.1X
+skipIntegers                                          0              0           0   26214400.0           0.0  122511.1X
+skipLongs                                             0              0           0   26214400.0           0.0  122511.1X
+skipFloats                                            0              0           0   26214400.0           0.0  122511.1X
+skipDoubles                                           0              0           0   26214400.0           0.0  122511.1X
+
+
diff --git a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt
index acdf8d9b51737..cb53e9dd5b2a2 100644
--- a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk21-results.txt
@@ -2,81 +2,153 @@
 Boolean decode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 RLE readBooleans decode:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cold reader, trueRatio=0.0                            0              0           0      25171.1           0.0       1.0X
-reused reader, trueRatio=0.0                          0              0           0      25171.1           0.0       1.0X
-cold reader, trueRatio=0.1                            1              1           0        755.2           1.3       0.0X
-reused reader, trueRatio=0.1                          1              1           0        754.1           1.3       0.0X
-cold reader, trueRatio=0.5                            1              1           0        835.6           1.2       0.0X
-reused reader, trueRatio=0.5                          1              1           0        833.3           1.2       0.0X
-cold reader, trueRatio=0.9                            1              1           0        753.3           1.3       0.0X
-reused reader, trueRatio=0.9                          1              1           0        753.6           1.3       0.0X
-cold reader, trueRatio=1.0                            0              0           0      25165.0           0.0       1.0X
-reused reader, trueRatio=1.0                          0              0           0      25183.2           0.0       1.0X
+cold reader, trueRatio=0.0                            0              0           0      59466.7           0.0       1.0X
+reused reader, trueRatio=0.0                          0              0           0      82871.7           0.0       1.4X
+cold reader, trueRatio=0.1                            1              1           0        744.9           1.3       0.0X
+reused reader, trueRatio=0.1                          1              1           0        746.1           1.3       0.0X
+cold reader, trueRatio=0.5                            1              1           0        826.2           1.2       0.0X
+reused reader, trueRatio=0.5                          1              1           0        828.1           1.2       0.0X
+cold reader, trueRatio=0.9                            1              1           0        743.5           1.3       0.0X
+reused reader, trueRatio=0.9                          1              1           0        738.4           1.4       0.0X
+cold reader, trueRatio=1.0                            0              0           0      82409.3           0.0       1.4X
+reused reader, trueRatio=1.0                          0              0           0      82871.7           0.0       1.4X
 
 
 ================================================================================================
 Integer decode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 RLE readIntegers dictionary-id decode:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PACKED cold, bitWidth=4                               2              2           0        478.0           2.1       1.0X
-PACKED reused, bitWidth=4                             2              2           0        475.7           2.1       1.0X
-RLE, bitWidth=4                                       0              0           0       4508.2           0.2       9.4X
-PACKED cold, bitWidth=8                               2              2           0        518.0           1.9       1.1X
-PACKED reused, bitWidth=8                             2              2           0        515.7           1.9       1.1X
-RLE, bitWidth=8                                       0              0           0       4508.9           0.2       9.4X
-PACKED cold, bitWidth=12                              8              8           0        136.1           7.3       0.3X
-PACKED reused, bitWidth=12                            8              8           0        136.1           7.3       0.3X
-RLE, bitWidth=12                                      0              0           0       4503.7           0.2       9.4X
-PACKED cold, bitWidth=20                              3              3           0        353.3           2.8       0.7X
-PACKED reused, bitWidth=20                            3              3           0        352.1           2.8       0.7X
-RLE, bitWidth=20                                      0              0           0       4508.9           0.2       9.4X
+PACKED cold, bitWidth=4                               2              2           0        489.7           2.0       1.0X
+PACKED reused, bitWidth=4                             2              2           0        487.7           2.1       1.0X
+RLE, bitWidth=4                                       0              0           0       4506.8           0.2       9.2X
+PACKED cold, bitWidth=8                               2              2           0        524.2           1.9       1.1X
+PACKED reused, bitWidth=8                             2              2           0        524.6           1.9       1.1X
+RLE, bitWidth=8                                       0              0           0       4507.0           0.2       9.2X
+PACKED cold, bitWidth=12                              3              3           0        417.6           2.4       0.9X
+PACKED reused, bitWidth=12                            3              3           0        415.4           2.4       0.8X
+RLE, bitWidth=12                                      0              0           0       4507.2           0.2       9.2X
+PACKED cold, bitWidth=20                              3              3           0        351.9           2.8       0.7X
+PACKED reused, bitWidth=20                            3              3           0        349.4           2.9       0.7X
+RLE, bitWidth=20                                      0              0           0       4499.6           0.2       9.2X
 
 
 ================================================================================================
 Nullable batch decode with def-level materialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 Nullable batch with def-levels:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-nullRatio=0.0, n/a                                    0              0           0       6502.7           0.2       1.0X
-nullRatio=0.1, random                                 8              9           0        123.7           8.1       0.0X
-nullRatio=0.1, clustered                              6              6           0        169.7           5.9       0.0X
-nullRatio=0.3, random                                12             12           0         87.3          11.5       0.0X
-nullRatio=0.3, clustered                              6              6           0        168.0           6.0       0.0X
-nullRatio=0.5, random                                13             13           0         79.8          12.5       0.0X
-nullRatio=0.5, clustered                              6              6           0        171.7           5.8       0.0X
-nullRatio=0.9, random                                 8              8           0        136.0           7.4       0.0X
-nullRatio=0.9, clustered                              6              6           0        181.6           5.5       0.0X
-nullRatio=1.0, random                                 0              0           0       5072.8           0.2       0.8X
+nullRatio=0.0, n/a                                    0              0           0       6695.3           0.1       1.0X
+nullRatio=0.1, random                                 9              9           0        123.2           8.1       0.0X
+nullRatio=0.1, clustered                              6              6           1        174.1           5.7       0.0X
+nullRatio=0.3, random                                12             12           0         85.3          11.7       0.0X
+nullRatio=0.3, clustered                              6              6           0        172.7           5.8       0.0X
+nullRatio=0.5, random                                14             14           0         76.5          13.1       0.0X
+nullRatio=0.5, clustered                              6              6           0        173.6           5.8       0.0X
+nullRatio=0.9, random                                 8              8           0        132.0           7.6       0.0X
+nullRatio=0.9, clustered                              6              6           0        182.4           5.5       0.0X
+nullRatio=1.0, random                                 0              0           0       5048.8           0.2       0.8X
 
 
 ================================================================================================
 Nullable batch decode without def-level materialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 Nullable batch without def-levels:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-nullRatio=0.0, n/a                                    0              0           0      11512.6           0.1       1.0X
-nullRatio=0.1, random                                 7              7           0        144.6           6.9       0.0X
-nullRatio=0.1, clustered                              5              5           0        197.2           5.1       0.0X
-nullRatio=0.3, random                                11             11           0         99.6          10.0       0.0X
-nullRatio=0.3, clustered                              6              6           0        189.8           5.3       0.0X
-nullRatio=0.5, random                                12             12           0         89.0          11.2       0.0X
-nullRatio=0.5, clustered                              5              5           0        194.7           5.1       0.0X
-nullRatio=0.9, random                                 7              7           0        151.9           6.6       0.0X
-nullRatio=0.9, clustered                              5              5           0        200.5           5.0       0.0X
-nullRatio=1.0, random                                 0              0           0      11945.0           0.1       1.0X
+nullRatio=0.0, n/a                                    0              0           0      12199.7           0.1       1.0X
+nullRatio=0.1, random                                 7              7           0        147.8           6.8       0.0X
+nullRatio=0.1, clustered                              5              5           0        204.6           4.9       0.0X
+nullRatio=0.3, random                                10             10           0        100.8           9.9       0.0X
+nullRatio=0.3, clustered                              5              5           0        200.6           5.0       0.0X
+nullRatio=0.5, random                                12             12           0         89.4          11.2       0.0X
+nullRatio=0.5, clustered                              5              5           0        199.3           5.0       0.0X
+nullRatio=0.9, random                                 7              7           0        153.3           6.5       0.0X
+nullRatio=0.9, clustered                              5              5           0        202.2           4.9       0.0X
+nullRatio=1.0, random                                 0              0           0      11887.9           0.1       1.0X
+
+
+================================================================================================
+Nullable batch decode with row-index filtering (with def-levels)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Nullable batch with def-levels, row-index filtered:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------------
+nullRatio=0.0, contiguous 50%                                   1              1           0        757.3           1.3       1.0X
+nullRatio=0.3, contiguous 50%                                   9              9           0        119.6           8.4       0.2X
+nullRatio=0.9, contiguous 50%                                   7              7           0        158.9           6.3       0.2X
+nullRatio=0.0, alt 1000-row windows                             3              3           0        377.7           2.6       0.5X
+nullRatio=0.3, alt 1000-row windows                            10             10           0        102.3           9.8       0.1X
+nullRatio=0.9, alt 1000-row windows                             8              8           1        130.9           7.6       0.2X
+
+
+================================================================================================
+Nullable batch decode with row-index filtering (without def-levels)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Nullable batch without def-levels, row-index filtered:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------------
+nullRatio=0.0, contiguous 50%                                      1              2           0        767.0           1.3       1.0X
+nullRatio=0.3, contiguous 50%                                      8              8           0        129.1           7.7       0.2X
+nullRatio=0.9, contiguous 50%                                      6              7           0        166.0           6.0       0.2X
+nullRatio=0.0, alt 1000-row windows                                3              3           0        377.2           2.7       0.5X
+nullRatio=0.3, alt 1000-row windows                               10             10           0        109.0           9.2       0.1X
+nullRatio=0.9, alt 1000-row windows                                8              8           0        137.5           7.3       0.2X
+
+
+================================================================================================
+Single-value reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Single-value reads:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBoolean                                           3              3           0        311.5           3.2       1.0X
+readInteger, bitWidth=4                               4              4           0        275.7           3.6       0.9X
+readValueDictionaryId, bitWidth=4                     4              4           0        276.2           3.6       0.9X
+readInteger, bitWidth=8                               4              4           0        289.2           3.5       0.9X
+readValueDictionaryId, bitWidth=8                     4              4           0        289.8           3.5       0.9X
+readInteger, bitWidth=12                              4              4           0        252.3           4.0       0.8X
+readValueDictionaryId, bitWidth=12                    4              4           0        252.1           4.0       0.8X
+readInteger, bitWidth=20                              5              5           0        227.7           4.4       0.7X
+readValueDictionaryId, bitWidth=20                    5              5           0        227.2           4.4       0.7X
+
+
+================================================================================================
+Skip
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Skip:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+skipBooleans, trueRatio=0.0                           0              0           0   26214400.0           0.0       1.0X
+skipBooleans, trueRatio=0.5                           2              2           0        559.1           1.8       0.0X
+skipBooleans, trueRatio=1.0                           0              0           0   26214400.0           0.0       1.0X
+skipIntegers PACKED, bitWidth=4                       2              2           0        502.4           2.0       0.0X
+skipIntegers RLE, bitWidth=4                          0              0           0   21399510.2           0.0       0.8X
+skipIntegers PACKED, bitWidth=8                       2              2           0        551.4           1.8       0.0X
+skipIntegers RLE, bitWidth=8                          0              0           0   21399510.2           0.0       0.8X
+skipIntegers PACKED, bitWidth=12                      2              2           0        431.5           2.3       0.0X
+skipIntegers RLE, bitWidth=12                         0              0           0   21399510.2           0.0       0.8X
+skipIntegers PACKED, bitWidth=20                      3              3           0        364.1           2.7       0.0X
+skipIntegers RLE, bitWidth=20                         0              0           0   21399510.2           0.0       0.8X
 
 
diff --git a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt
index 2b245cb206303..3029a4b3268b9 100644
--- a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt
+++ b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-jdk25-results.txt
@@ -2,81 +2,153 @@
 Boolean decode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
 AMD EPYC 7763 64-Core Processor
 RLE readBooleans decode:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cold reader, trueRatio=0.0                            0              0           0      60530.9           0.0       1.0X
-reused reader, trueRatio=0.0                          0              0           0      56821.1           0.0       0.9X
-cold reader, trueRatio=0.1                            1              1           0        709.0           1.4       0.0X
-reused reader, trueRatio=0.1                          1              1           0        708.2           1.4       0.0X
-cold reader, trueRatio=0.5                            1              1           0        747.9           1.3       0.0X
-reused reader, trueRatio=0.5                          1              1           0        746.5           1.3       0.0X
-cold reader, trueRatio=0.9                            1              1           0        709.0           1.4       0.0X
-reused reader, trueRatio=0.9                          1              2           0        705.6           1.4       0.0X
-cold reader, trueRatio=1.0                            0              0           0      59164.7           0.0       1.0X
-reused reader, trueRatio=1.0                          0              0           0      58832.7           0.0       1.0X
+cold reader, trueRatio=0.0                            0              0           0       5323.3           0.2       1.0X
+reused reader, trueRatio=0.0                          0              0           0       4464.5           0.2       0.8X
+cold reader, trueRatio=0.1                            2              2           0        669.3           1.5       0.1X
+reused reader, trueRatio=0.1                          2              2           0        672.6           1.5       0.1X
+cold reader, trueRatio=0.5                            1              1           0        722.5           1.4       0.1X
+reused reader, trueRatio=0.5                          1              1           0        724.3           1.4       0.1X
+cold reader, trueRatio=0.9                            2              2           0        669.6           1.5       0.1X
+reused reader, trueRatio=0.9                          2              2           0        674.0           1.5       0.1X
+cold reader, trueRatio=1.0                            0              0           0       4574.1           0.2       0.9X
+reused reader, trueRatio=1.0                          0              0           0       4460.9           0.2       0.8X
 
 
 ================================================================================================
 Integer decode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
 AMD EPYC 7763 64-Core Processor
 RLE readIntegers dictionary-id decode:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PACKED cold, bitWidth=4                               2              2           0        517.0           1.9       1.0X
-PACKED reused, bitWidth=4                             2              2           0        518.3           1.9       1.0X
-RLE, bitWidth=4                                       0              0           0      18145.5           0.1      35.1X
+PACKED cold, bitWidth=4                               2              2           0        516.7           1.9       1.0X
+PACKED reused, bitWidth=4                             2              2           0        516.1           1.9       1.0X
+RLE, bitWidth=4                                       0              0           0      18696.2           0.1      36.2X
 PACKED cold, bitWidth=8                               2              2           0        570.0           1.8       1.1X
-PACKED reused, bitWidth=8                             2              2           0        556.9           1.8       1.1X
-RLE, bitWidth=8                                       0              0           0      18098.2           0.1      35.0X
-PACKED cold, bitWidth=12                              2              2           0        454.5           2.2       0.9X
-PACKED reused, bitWidth=12                            2              2           0        453.0           2.2       0.9X
-RLE, bitWidth=12                                      0              0           0      18164.4           0.1      35.1X
-PACKED cold, bitWidth=20                              3              3           0        374.6           2.7       0.7X
-PACKED reused, bitWidth=20                            3              3           0        374.2           2.7       0.7X
-RLE, bitWidth=20                                      0              0           0      18462.1           0.1      35.7X
+PACKED reused, bitWidth=8                             2              2           0        567.0           1.8       1.1X
+RLE, bitWidth=8                                       0              0           0      18583.5           0.1      36.0X
+PACKED cold, bitWidth=12                              2              2           0        454.6           2.2       0.9X
+PACKED reused, bitWidth=12                            2              2           0        452.6           2.2       0.9X
+RLE, bitWidth=12                                      0              0           0      18696.2           0.1      36.2X
+PACKED cold, bitWidth=20                              3              3           0        373.2           2.7       0.7X
+PACKED reused, bitWidth=20                            3              3           0        369.4           2.7       0.7X
+RLE, bitWidth=20                                      0              0           0      15516.8           0.1      30.0X
 
 
 ================================================================================================
 Nullable batch decode with def-level materialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
 AMD EPYC 7763 64-Core Processor
 Nullable batch with def-levels:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-nullRatio=0.0, n/a                                    0              0           0       6625.9           0.2       1.0X
-nullRatio=0.1, random                                 9              9           0        121.8           8.2       0.0X
-nullRatio=0.1, clustered                              6              6           0        170.3           5.9       0.0X
-nullRatio=0.3, random                                13             13           0         83.8          11.9       0.0X
-nullRatio=0.3, clustered                              6              6           0        170.4           5.9       0.0X
-nullRatio=0.5, random                                14             14           0         75.8          13.2       0.0X
-nullRatio=0.5, clustered                              6              6           0        172.1           5.8       0.0X
-nullRatio=0.9, random                                 8              8           0        131.0           7.6       0.0X
-nullRatio=0.9, clustered                              6              6           0        181.7           5.5       0.0X
-nullRatio=1.0, random                                 0              0           0       8305.2           0.1       1.3X
+nullRatio=0.0, n/a                                    0              0           0       6608.2           0.2       1.0X
+nullRatio=0.1, random                                 9              9           0        119.1           8.4       0.0X
+nullRatio=0.1, clustered                              6              6           0        166.2           6.0       0.0X
+nullRatio=0.3, random                                13             13           0         81.2          12.3       0.0X
+nullRatio=0.3, clustered                              6              6           0        166.0           6.0       0.0X
+nullRatio=0.5, random                                15             15           1         71.3          14.0       0.0X
+nullRatio=0.5, clustered                              6              6           0        166.6           6.0       0.0X
+nullRatio=0.9, random                                 8              8           0        127.6           7.8       0.0X
+nullRatio=0.9, clustered                              6              6           0        175.5           5.7       0.0X
+nullRatio=1.0, random                                 0              0           0       8275.6           0.1       1.3X
 
 
 ================================================================================================
 Nullable batch decode without def-level materialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
 AMD EPYC 7763 64-Core Processor
 Nullable batch without def-levels:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-nullRatio=0.0, n/a                                    0              0           0      11869.8           0.1       1.0X
-nullRatio=0.1, random                                 7              7           0        149.1           6.7       0.0X
-nullRatio=0.1, clustered                              5              5           0        208.1           4.8       0.0X
-nullRatio=0.3, random                                10             10           0        101.1           9.9       0.0X
-nullRatio=0.3, clustered                              5              5           0        206.6           4.8       0.0X
-nullRatio=0.5, random                                12             12           0         90.7          11.0       0.0X
-nullRatio=0.5, clustered                              5              5           0        206.4           4.8       0.0X
-nullRatio=0.9, random                                 7              7           0        160.2           6.2       0.0X
-nullRatio=0.9, clustered                              5              5           0        213.5           4.7       0.0X
-nullRatio=1.0, random                                 0              0           0      11957.2           0.1       1.0X
+nullRatio=0.0, n/a                                    0              0           0      11464.6           0.1       1.0X
+nullRatio=0.1, random                                 7              7           0        148.7           6.7       0.0X
+nullRatio=0.1, clustered                              5              5           0        207.2           4.8       0.0X
+nullRatio=0.3, random                                10             10           0        100.6           9.9       0.0X
+nullRatio=0.3, clustered                              5              5           0        204.8           4.9       0.0X
+nullRatio=0.5, random                                12             12           0         90.6          11.0       0.0X
+nullRatio=0.5, clustered                              5              5           0        205.1           4.9       0.0X
+nullRatio=0.9, random                                 7              7           0        158.9           6.3       0.0X
+nullRatio=0.9, clustered                              5              5           0        212.4           4.7       0.0X
+nullRatio=1.0, random                                 0              0           0      11983.2           0.1       1.0X
+
+
+================================================================================================
+Nullable batch decode with row-index filtering (with def-levels)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+Nullable batch with def-levels, row-index filtered:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------------
+nullRatio=0.0, contiguous 50%                                   1              1           0        762.9           1.3       1.0X
+nullRatio=0.3, contiguous 50%                                   9              9           0        116.5           8.6       0.2X
+nullRatio=0.9, contiguous 50%                                   7              7           0        156.2           6.4       0.2X
+nullRatio=0.0, alt 1000-row windows                             2              2           0        433.5           2.3       0.6X
+nullRatio=0.3, alt 1000-row windows                            10             10           0        103.5           9.7       0.1X
+nullRatio=0.9, alt 1000-row windows                             8              8           1        136.1           7.3       0.2X
+
+
+================================================================================================
+Nullable batch decode with row-index filtering (without def-levels)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+Nullable batch without def-levels, row-index filtered:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------------
+nullRatio=0.0, contiguous 50%                                      1              2           0        732.5           1.4       1.0X
+nullRatio=0.3, contiguous 50%                                      8              8           0        131.7           7.6       0.2X
+nullRatio=0.9, contiguous 50%                                      6              6           0        173.9           5.8       0.2X
+nullRatio=0.0, alt 1000-row windows                                2              2           0        423.8           2.4       0.6X
+nullRatio=0.3, alt 1000-row windows                                9              9           0        115.8           8.6       0.2X
+nullRatio=0.9, alt 1000-row windows                                7              7           0        147.6           6.8       0.2X
+
+
+================================================================================================
+Single-value reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+Single-value reads:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBoolean                                           3              3           0        321.6           3.1       1.0X
+readInteger, bitWidth=4                               3              4           0        305.3           3.3       0.9X
+readValueDictionaryId, bitWidth=4                     3              4           0        305.0           3.3       0.9X
+readInteger, bitWidth=8                               3              3           0        322.7           3.1       1.0X
+readValueDictionaryId, bitWidth=8                     3              3           0        322.7           3.1       1.0X
+readInteger, bitWidth=12                              4              4           0        282.4           3.5       0.9X
+readValueDictionaryId, bitWidth=12                    4              4           0        282.4           3.5       0.9X
+readInteger, bitWidth=20                              4              4           0        248.5           4.0       0.8X
+readValueDictionaryId, bitWidth=20                    4              4           0        248.4           4.0       0.8X
+
+
+================================================================================================
+Skip
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1011-azure
+AMD EPYC 7763 64-Core Processor
+Skip:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+skipBooleans, trueRatio=0.0                           0              0           0   26214400.0           0.0       1.0X
+skipBooleans, trueRatio=0.5                           2              2           0        621.6           1.6       0.0X
+skipBooleans, trueRatio=1.0                           0              0           0   26214400.0           0.0       1.0X
+skipIntegers PACKED, bitWidth=4                       2              2           0        537.5           1.9       0.0X
+skipIntegers RLE, bitWidth=4                          0              0           0   26214400.0           0.0       1.0X
+skipIntegers PACKED, bitWidth=8                       2              2           0        599.4           1.7       0.0X
+skipIntegers RLE, bitWidth=8                          0              0           0   26214400.0           0.0       1.0X
+skipIntegers PACKED, bitWidth=12                      2              2           0        471.2           2.1       0.0X
+skipIntegers RLE, bitWidth=12                         0              0           0   21399510.2           0.0       0.8X
+skipIntegers PACKED, bitWidth=20                      3              3           0        384.7           2.6       0.0X
+skipIntegers RLE, bitWidth=20                         0              0           0   21399510.2           0.0       0.8X
 
 
diff --git a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt
index 10def40f3e1e8..749296283bd35 100644
--- a/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt
+++ b/sql/core/benchmarks/VectorizedRleValuesReaderBenchmark-results.txt
@@ -2,81 +2,153 @@
 Boolean decode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 RLE readBooleans decode:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cold reader, trueRatio=0.0                            0              0           0      47464.1           0.0       1.0X
-reused reader, trueRatio=0.0                          0              0           0      47485.6           0.0       1.0X
-cold reader, trueRatio=0.1                            1              1           0        894.1           1.1       0.0X
-reused reader, trueRatio=0.1                          1              1           0        897.4           1.1       0.0X
-cold reader, trueRatio=0.5                            1              1           0       1027.8           1.0       0.0X
-reused reader, trueRatio=0.5                          1              1           0       1029.3           1.0       0.0X
-cold reader, trueRatio=0.9                            1              1           0        893.8           1.1       0.0X
-reused reader, trueRatio=0.9                          1              1           0        896.6           1.1       0.0X
-cold reader, trueRatio=1.0                            0              0           0      47421.1           0.0       1.0X
-reused reader, trueRatio=1.0                          0              0           0      47485.6           0.0       1.0X
+cold reader, trueRatio=0.0                            0              0           0      66239.8           0.0       1.0X
+reused reader, trueRatio=0.0                          0              0           0      57887.6           0.0       0.9X
+cold reader, trueRatio=0.1                            1              1           0        893.5           1.1       0.0X
+reused reader, trueRatio=0.1                          1              1           0        895.6           1.1       0.0X
+cold reader, trueRatio=0.5                            1              1           0       1018.7           1.0       0.0X
+reused reader, trueRatio=0.5                          1              1           0       1029.4           1.0       0.0X
+cold reader, trueRatio=0.9                            1              1           0        891.9           1.1       0.0X
+reused reader, trueRatio=0.9                          1              1           0        894.8           1.1       0.0X
+cold reader, trueRatio=1.0                            0              0           0      67001.7           0.0       1.0X
+reused reader, trueRatio=1.0                          0              0           0      72380.5           0.0       1.1X
 
 
 ================================================================================================
 Integer decode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 RLE readIntegers dictionary-id decode:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PACKED cold, bitWidth=4                               2              2           0        488.1           2.0       1.0X
-PACKED reused, bitWidth=4                             2              2           0        485.9           2.1       1.0X
-RLE, bitWidth=4                                       0              0           0      19116.1           0.1      39.2X
-PACKED cold, bitWidth=8                               2              2           0        482.1           2.1       1.0X
-PACKED reused, bitWidth=8                             2              2           0        479.7           2.1       1.0X
-RLE, bitWidth=8                                       0              0           0      18567.1           0.1      38.0X
-PACKED cold, bitWidth=12                              3              3           0        362.1           2.8       0.7X
-PACKED reused, bitWidth=12                            3              3           0        361.3           2.8       0.7X
-RLE, bitWidth=12                                      0              0           0      19126.9           0.1      39.2X
-PACKED cold, bitWidth=20                              3              3           0        308.6           3.2       0.6X
-PACKED reused, bitWidth=20                            3              3           0        306.5           3.3       0.6X
-RLE, bitWidth=20                                      0              0           0      19074.4           0.1      39.1X
+PACKED cold, bitWidth=4                               2              2           0        505.6           2.0       1.0X
+PACKED reused, bitWidth=4                             2              2           0        504.6           2.0       1.0X
+RLE, bitWidth=4                                       0              0           0      18249.1           0.1      36.1X
+PACKED cold, bitWidth=8                               2              2           0        497.6           2.0       1.0X
+PACKED reused, bitWidth=8                             2              2           0        496.2           2.0       1.0X
+RLE, bitWidth=8                                       0              0           0      18123.0           0.1      35.8X
+PACKED cold, bitWidth=12                              3              3           0        370.2           2.7       0.7X
+PACKED reused, bitWidth=12                            3              3           0        369.6           2.7       0.7X
+RLE, bitWidth=12                                      0              0           0      18573.3           0.1      36.7X
+PACKED cold, bitWidth=20                              3              3           0        315.1           3.2       0.6X
+PACKED reused, bitWidth=20                            3              3           0        316.2           3.2       0.6X
+RLE, bitWidth=20                                      0              0           0      18570.0           0.1      36.7X
 
 
 ================================================================================================
 Nullable batch decode with def-level materialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 Nullable batch with def-levels:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-nullRatio=0.0, n/a                                    0              0           0       6441.9           0.2       1.0X
-nullRatio=0.1, random                                 9              9           0        113.0           8.9       0.0X
-nullRatio=0.1, clustered                              7              7           0        144.8           6.9       0.0X
-nullRatio=0.3, random                                13             13           0         78.9          12.7       0.0X
-nullRatio=0.3, clustered                              7              7           0        156.6           6.4       0.0X
-nullRatio=0.5, random                                15             15           0         71.9          13.9       0.0X
-nullRatio=0.5, clustered                              7              7           0        159.2           6.3       0.0X
-nullRatio=0.9, random                                 8              8           0        124.8           8.0       0.0X
-nullRatio=0.9, clustered                              6              6           0        171.3           5.8       0.0X
-nullRatio=1.0, random                                 0              0           0       8031.7           0.1       1.2X
+nullRatio=0.0, n/a                                    0              0           0       6431.5           0.2       1.0X
+nullRatio=0.1, random                                 9              9           0        114.6           8.7       0.0X
+nullRatio=0.1, clustered                              7              7           0        159.7           6.3       0.0X
+nullRatio=0.3, random                                13             13           0         80.1          12.5       0.0X
+nullRatio=0.3, clustered                              7              7           1        157.8           6.3       0.0X
+nullRatio=0.5, random                                14             15           0         72.7          13.7       0.0X
+nullRatio=0.5, clustered                              6              7           0        162.0           6.2       0.0X
+nullRatio=0.9, random                                 8              8           0        126.4           7.9       0.0X
+nullRatio=0.9, clustered                              6              6           0        174.0           5.7       0.0X
+nullRatio=1.0, random                                 0              0           0       8062.5           0.1       1.3X
 
 
 ================================================================================================
 Nullable batch decode without def-level materialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.17.0-1010-azure
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
 AMD EPYC 7763 64-Core Processor
 Nullable batch without def-levels:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-nullRatio=0.0, n/a                                    0              0           0      11037.9           0.1       1.0X
-nullRatio=0.1, random                                 8              8           0        139.2           7.2       0.0X
-nullRatio=0.1, clustered                              6              6           0        190.1           5.3       0.0X
-nullRatio=0.3, random                                11             11           0         96.7          10.3       0.0X
-nullRatio=0.3, clustered                              6              6           0        188.5           5.3       0.0X
-nullRatio=0.5, random                                12             12           0         86.9          11.5       0.0X
-nullRatio=0.5, clustered                              6              6           0        188.0           5.3       0.0X
-nullRatio=0.9, random                                 7              7           0        149.4           6.7       0.0X
-nullRatio=0.9, clustered                              5              5           0        197.9           5.1       0.0X
-nullRatio=1.0, random                                 0              0           0      11675.8           0.1       1.1X
+nullRatio=0.0, n/a                                    0              0           0      11054.0           0.1       1.0X
+nullRatio=0.1, random                                 7              8           0        140.6           7.1       0.0X
+nullRatio=0.1, clustered                              5              5           0        193.2           5.2       0.0X
+nullRatio=0.3, random                                11             11           0         97.4          10.3       0.0X
+nullRatio=0.3, clustered                              6              6           0        184.4           5.4       0.0X
+nullRatio=0.5, random                                12             12           0         87.7          11.4       0.0X
+nullRatio=0.5, clustered                              5              6           0        191.6           5.2       0.0X
+nullRatio=0.9, random                                 7              7           0        151.7           6.6       0.0X
+nullRatio=0.9, clustered                              5              5           0        200.8           5.0       0.0X
+nullRatio=1.0, random                                 0              0           0      11662.5           0.1       1.1X
+
+
+================================================================================================
+Nullable batch decode with row-index filtering (with def-levels)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Nullable batch with def-levels, row-index filtered:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------------
+nullRatio=0.0, contiguous 50%                                   1              2           0        763.7           1.3       1.0X
+nullRatio=0.3, contiguous 50%                                   9              9           0        117.3           8.5       0.2X
+nullRatio=0.9, contiguous 50%                                   7              7           0        157.5           6.3       0.2X
+nullRatio=0.0, alt 1000-row windows                             3              3           0        418.8           2.4       0.5X
+nullRatio=0.3, alt 1000-row windows                            10             10           0        103.8           9.6       0.1X
+nullRatio=0.9, alt 1000-row windows                             8              8           0        134.7           7.4       0.2X
+
+
+================================================================================================
+Nullable batch decode with row-index filtering (without def-levels)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Nullable batch without def-levels, row-index filtered:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------------
+nullRatio=0.0, contiguous 50%                                      1              1           0        865.4           1.2       1.0X
+nullRatio=0.3, contiguous 50%                                      8              8           0        128.8           7.8       0.1X
+nullRatio=0.9, contiguous 50%                                      6              6           0        173.4           5.8       0.2X
+nullRatio=0.0, alt 1000-row windows                                2              2           0        425.9           2.3       0.5X
+nullRatio=0.3, alt 1000-row windows                                9              9           0        111.3           9.0       0.1X
+nullRatio=0.9, alt 1000-row windows                                7              7           0        143.3           7.0       0.2X
+
+
+================================================================================================
+Single-value reads
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Single-value reads:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+readBoolean                                           4              4           0        287.0           3.5       1.0X
+readInteger, bitWidth=4                               4              4           0        275.7           3.6       1.0X
+readValueDictionaryId, bitWidth=4                     4              4           0        275.5           3.6       1.0X
+readInteger, bitWidth=8                               4              4           0        273.7           3.7       1.0X
+readValueDictionaryId, bitWidth=8                     4              4           0        273.4           3.7       1.0X
+readInteger, bitWidth=12                              5              5           1        230.1           4.3       0.8X
+readValueDictionaryId, bitWidth=12                    5              5           0        229.3           4.4       0.8X
+readInteger, bitWidth=20                              5              5           0        207.9           4.8       0.7X
+readValueDictionaryId, bitWidth=20                    5              5           0        207.3           4.8       0.7X
+
+
+================================================================================================
+Skip
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+Skip:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+skipBooleans, trueRatio=0.0                           0              0           0   20971520.0           0.0       1.0X
+skipBooleans, trueRatio=0.5                           2              2           0        569.4           1.8       0.0X
+skipBooleans, trueRatio=1.0                           0              0           0   21399510.2           0.0       1.0X
+skipIntegers PACKED, bitWidth=4                       2              2           0        522.7           1.9       0.0X
+skipIntegers RLE, bitWidth=4                          0              0           0   20971520.0           0.0       1.0X
+skipIntegers PACKED, bitWidth=8                       2              2           0        516.6           1.9       0.0X
+skipIntegers RLE, bitWidth=8                          0              0           0   21399510.2           0.0       1.0X
+skipIntegers PACKED, bitWidth=12                      3              3           0        382.4           2.6       0.0X
+skipIntegers RLE, bitWidth=12                         0              0           0   17476266.7           0.0       0.8X
+skipIntegers PACKED, bitWidth=20                      3              3           0        323.0           3.1       0.0X
+skipIntegers RLE, bitWidth=20                         0              0           0   17476266.7           0.0       0.8X
 
 
diff --git a/sql/core/benchmarks/WindowBenchmark-jdk21-results.txt b/sql/core/benchmarks/WindowBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..8838a4a9a0fe9
--- /dev/null
+++ b/sql/core/benchmarks/WindowBenchmark-jdk21-results.txt
@@ -0,0 +1,172 @@
+================================================================================================
+Section A - MIN (non-invertible)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+MIN sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+MIN naive (current, baseline)                      4112           4231         227          0.1       15684.2       1.0X
+MIN segtree (default)                               428            447          20          0.6        1633.2       9.6X
+MIN segtree (blockSize=256)                        2271           2290          17          0.1        8664.1       1.8X
+
+
+================================================================================================
+Section A - MAX (non-invertible)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+MAX sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+MAX naive (current, baseline)                      4141           4154          14          0.1       15794.9       1.0X
+MAX segtree (default)                               418            430          11          0.6        1596.1       9.9X
+MAX segtree (blockSize=256)                        2266           2284          18          0.1        8645.3       1.8X
+
+
+================================================================================================
+Section A - SUM (Spark has no inverse; full recompute)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive (current, baseline)                      4120           4128           7          0.1       15716.6       1.0X
+SUM segtree (default)                               420            444          17          0.6        1601.8       9.8X
+SUM segtree (blockSize=256)                        2290           2305          12          0.1        8734.2       1.8X
+
+
+================================================================================================
+Section A - COUNT
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+COUNT sliding window, W=1001, 256K rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+COUNT naive (current, baseline)                    3561           3577          14          0.1       13583.0       1.0X
+COUNT segtree (default)                             384            400          12          0.7        1466.3       9.3X
+COUNT segtree (blockSize=256)                      2232           2251          18          0.1        8515.2       1.6X
+
+
+================================================================================================
+Section A - AVG (multi-buffer)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+AVG sliding window, W=1001, 192K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+AVG naive (current, baseline)                      4288           4302          16          0.0       21808.7       1.0X
+AVG segtree (default)                               426            435          11          0.5        2168.3      10.1X
+AVG segtree (blockSize=256)                        1515           1528          13          0.1        7706.3       2.8X
+
+
+================================================================================================
+Section A - STDDEV_SAMP (multi-buffer, stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+STDDEV_SAMP sliding window, W=1001, 2M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+STDDEV_SAMP naive (current, baseline)                        100695         100937         361          0.0       50347.4       1.0X
+STDDEV_SAMP segtree (default)                                  5459           5475          15          0.4        2729.3      18.4X
+STDDEV_SAMP segtree (blockSize=256)                          111304         111341          33          0.0       55651.8       0.9X
+
+
+================================================================================================
+Section B - W=10 scaling (stress: Pareto loss zone)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM scaling, W=11, 2M rows (stress):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=11                                      924            948          23          2.2         461.9       1.0X
+SUM segtree (default) W=11                         2262           2277          14          0.9        1130.8       0.4X
+
+
+================================================================================================
+Section B - W=50 scaling (stress: Pareto loss zone)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM scaling, W=51, 2M rows (stress):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=51                                     2157           2169          15          0.9        1078.5       1.0X
+SUM segtree (default) W=51                         2558           2571          16          0.8        1279.1       0.8X
+
+
+================================================================================================
+Section B - W=201 scaling
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM scaling, W=201, 1M rows:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=201                                    3379           3404          16          0.3        3378.8       1.0X
+SUM segtree (default) W=201                        1430           1454          24          0.7        1429.9       2.4X
+
+
+================================================================================================
+Section B - W=4001 scaling (stress, + bs=256 cross-block)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM scaling, W=4001, 2M rows (stress):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=4001                                 122629         124509        1635          0.0       61314.3       1.0X
+SUM segtree (default) W=4001                       3697           3755          53          0.5        1848.6      33.2X
+SUM segtree (blockSize=256) W=4001               108139         108330         165          0.0       54069.6       1.1X
+
+
+================================================================================================
+Section F - spill regression guard (String, stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+MAX String spill guard, W=1001, 1M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+MAX naive (String)                                        51489          51810         520          0.0       51489.5       1.0X
+MAX segtree default (String)                               3008           3017          10          0.3        3007.5      17.1X
+
+
+================================================================================================
+Section C - N-sweep small (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM N-sweep (segtree-only), W=1001, 2M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=2M                                     3256           3284          26          0.6        1628.2       1.0X
+
+
+================================================================================================
+Section C - N-sweep mid (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM N-sweep (segtree-only), W=1001, 8M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=8M                                    15089          15176         131          0.5        1886.1       1.0X
+
+
+================================================================================================
+Section C - N-sweep large (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1010-azure
+AMD EPYC 7763 64-Core Processor
+SUM N-sweep (segtree-only), W=1001, 16M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=16M                                    36197          36269          92          0.4        2262.3       1.0X
+
+
diff --git a/sql/core/benchmarks/WindowBenchmark-jdk25-results.txt b/sql/core/benchmarks/WindowBenchmark-jdk25-results.txt
new file mode 100644
index 0000000000000..50dcff4c884d0
--- /dev/null
+++ b/sql/core/benchmarks/WindowBenchmark-jdk25-results.txt
@@ -0,0 +1,172 @@
+================================================================================================
+Section A - MIN (non-invertible)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+MIN sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+MIN naive (current, baseline)                      3840           3857          10          0.1       14648.6       1.0X
+MIN segtree (default)                               421            441          14          0.6        1604.7       9.1X
+MIN segtree (blockSize=256)                        1951           1973          19          0.1        7444.1       2.0X
+
+
+================================================================================================
+Section A - MAX (non-invertible)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+MAX sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+MAX naive (current, baseline)                      5415           5537         158          0.0       20657.2       1.0X
+MAX segtree (default)                               414            422           9          0.6        1578.6      13.1X
+MAX segtree (blockSize=256)                        1959           1975          13          0.1        7471.7       2.8X
+
+
+================================================================================================
+Section A - SUM (Spark has no inverse; full recompute)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive (current, baseline)                      4135           4143           5          0.1       15775.4       1.0X
+SUM segtree (default)                               423            432          14          0.6        1614.2       9.8X
+SUM segtree (blockSize=256)                        1972           1991          16          0.1        7524.2       2.1X
+
+
+================================================================================================
+Section A - COUNT
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+COUNT sliding window, W=1001, 256K rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+COUNT naive (current, baseline)                    3569           3632         127          0.1       13615.0       1.0X
+COUNT segtree (default)                             394            402          10          0.7        1504.7       9.0X
+COUNT segtree (blockSize=256)                      1939           1961          17          0.1        7396.6       1.8X
+
+
+================================================================================================
+Section A - AVG (multi-buffer)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+AVG sliding window, W=1001, 192K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+AVG naive (current, baseline)                      4134           4147          11          0.0       21028.2       1.0X
+AVG segtree (default)                               413            433          12          0.5        2102.7      10.0X
+AVG segtree (blockSize=256)                        1347           1384          32          0.1        6852.3       3.1X
+
+
+================================================================================================
+Section A - STDDEV_SAMP (multi-buffer, stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+STDDEV_SAMP sliding window, W=1001, 2M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+STDDEV_SAMP naive (current, baseline)                        105327         105484         189          0.0       52663.3       1.0X
+STDDEV_SAMP segtree (default)                                  5650           5657          12          0.4        2825.1      18.6X
+STDDEV_SAMP segtree (blockSize=256)                           97218          97841         551          0.0       48609.2       1.1X
+
+
+================================================================================================
+Section B - W=10 scaling (stress: Pareto loss zone)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=11, 2M rows (stress):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=11                                     1016           1023           6          2.0         507.9       1.0X
+SUM segtree (default) W=11                         2319           2336          15          0.9        1159.7       0.4X
+
+
+================================================================================================
+Section B - W=50 scaling (stress: Pareto loss zone)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=51, 2M rows (stress):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=51                                     2208           2234          22          0.9        1103.8       1.0X
+SUM segtree (default) W=51                         2653           2661          13          0.8        1326.7       0.8X
+
+
+================================================================================================
+Section B - W=201 scaling
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=201, 1M rows:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=201                                    3314           3362          88          0.3        3313.9       1.0X
+SUM segtree (default) W=201                        1447           1475          21          0.7        1447.4       2.3X
+
+
+================================================================================================
+Section B - W=4001 scaling (stress, + bs=256 cross-block)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=4001, 2M rows (stress):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=4001                                 119582         124613        6122          0.0       59791.1       1.0X
+SUM segtree (default) W=4001                       3828           3863          33          0.5        1914.2      31.2X
+SUM segtree (blockSize=256) W=4001                93752          93810          86          0.0       46875.9       1.3X
+
+
+================================================================================================
+Section F - spill regression guard (String, stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+MAX String spill guard, W=1001, 1M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+MAX naive (String)                                        55976          56088         126          0.0       55976.4       1.0X
+MAX segtree default (String)                               2822           2849          32          0.4        2821.9      19.8X
+
+
+================================================================================================
+Section C - N-sweep small (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM N-sweep (segtree-only), W=1001, 2M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=2M                                     3349           3387          34          0.6        1674.7       1.0X
+
+
+================================================================================================
+Section C - N-sweep mid (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM N-sweep (segtree-only), W=1001, 8M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=8M                                    15536          15568          56          0.5        1941.9       1.0X
+
+
+================================================================================================
+Section C - N-sweep large (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1010-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM N-sweep (segtree-only), W=1001, 16M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=16M                                    36692          36765          79          0.4        2293.3       1.0X
+
+
diff --git a/sql/core/benchmarks/WindowBenchmark-results.txt b/sql/core/benchmarks/WindowBenchmark-results.txt
new file mode 100644
index 0000000000000..bb50c20b59213
--- /dev/null
+++ b/sql/core/benchmarks/WindowBenchmark-results.txt
@@ -0,0 +1,172 @@
+================================================================================================
+Section A - MIN (non-invertible)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+MIN sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+MIN naive (current, baseline)                      4350           4380          30          0.1       16592.3       1.0X
+MIN segtree (default)                               433            442          10          0.6        1652.8      10.0X
+MIN segtree (blockSize=256)                        2259           2266           9          0.1        8617.9       1.9X
+
+
+================================================================================================
+Section A - MAX (non-invertible)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+MAX sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+MAX naive (current, baseline)                      4113           4124          21          0.1       15690.6       1.0X
+MAX segtree (default)                               415            420           3          0.6        1582.0       9.9X
+MAX segtree (blockSize=256)                        2279           2288           9          0.1        8692.0       1.8X
+
+
+================================================================================================
+Section A - SUM (Spark has no inverse; full recompute)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM sliding window, W=1001, 256K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive (current, baseline)                      4828           4870          61          0.1       18418.4       1.0X
+SUM segtree (default)                               431            436           6          0.6        1643.9      11.2X
+SUM segtree (blockSize=256)                        2286           2295          13          0.1        8720.2       2.1X
+
+
+================================================================================================
+Section A - COUNT
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+COUNT sliding window, W=1001, 256K rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+COUNT naive (current, baseline)                    3867           3883          13          0.1       14751.9       1.0X
+COUNT segtree (default)                             415            417           3          0.6        1582.8       9.3X
+COUNT segtree (blockSize=256)                      2216           2220           7          0.1        8453.8       1.7X
+
+
+================================================================================================
+Section A - AVG (multi-buffer)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+AVG sliding window, W=1001, 192K rows:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+AVG naive (current, baseline)                      4483           4495          14          0.0       22804.2       1.0X
+AVG segtree (default)                               426            435           8          0.5        2165.9      10.5X
+AVG segtree (blockSize=256)                        1551           1566          18          0.1        7890.5       2.9X
+
+
+================================================================================================
+Section A - STDDEV_SAMP (multi-buffer, stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+STDDEV_SAMP sliding window, W=1001, 2M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+STDDEV_SAMP naive (current, baseline)                         97349          97405          59          0.0       48674.4       1.0X
+STDDEV_SAMP segtree (default)                                  5628           5631           3          0.4        2813.9      17.3X
+STDDEV_SAMP segtree (blockSize=256)                          113685         113987         289          0.0       56842.3       0.9X
+
+
+================================================================================================
+Section B - W=10 scaling (stress: Pareto loss zone)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=11, 2M rows (stress):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=11                                     1003           1012           8          2.0         501.6       1.0X
+SUM segtree (default) W=11                         2378           2384           9          0.8        1188.8       0.4X
+
+
+================================================================================================
+Section B - W=50 scaling (stress: Pareto loss zone)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=51, 2M rows (stress):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=51                                     2377           2395          24          0.8        1188.5       1.0X
+SUM segtree (default) W=51                         2667           2683          17          0.7        1333.7       0.9X
+
+
+================================================================================================
+Section B - W=201 scaling
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=201, 1M rows:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=201                                    3588           3620          39          0.3        3588.1       1.0X
+SUM segtree (default) W=201                        1465           1474          10          0.7        1465.1       2.4X
+
+
+================================================================================================
+Section B - W=4001 scaling (stress, + bs=256 cross-block)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM scaling, W=4001, 2M rows (stress):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SUM naive W=4001                                 129657         129727          67          0.0       64828.5       1.0X
+SUM segtree (default) W=4001                       3834           3858          24          0.5        1916.8      33.8X
+SUM segtree (blockSize=256) W=4001               110788         111483         748          0.0       55393.8       1.2X
+
+
+================================================================================================
+Section F - spill regression guard (String, stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+MAX String spill guard, W=1001, 1M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+MAX naive (String)                                        88459          88494          43          0.0       88459.0       1.0X
+MAX segtree default (String)                               3213           3227          21          0.3        3213.4      27.5X
+
+
+================================================================================================
+Section C - N-sweep small (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM N-sweep (segtree-only), W=1001, 2M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=2M                                     3314           3319           6          0.6        1657.1       1.0X
+
+
+================================================================================================
+Section C - N-sweep mid (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM N-sweep (segtree-only), W=1001, 8M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=8M                                    15970          16004          42          0.5        1996.3       1.0X
+
+
+================================================================================================
+Section C - N-sweep large (stress)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1011-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+SUM N-sweep (segtree-only), W=1001, 16M rows (stress):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------------
+SUM segtree (default) N=16M                                    38877          38998         121          0.4        2429.8       1.0X
+
+
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index b788b277352c4..9bc4824b603a3 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java b/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java
index 3d0511b7ba838..52357acf3c7d6 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java
@@ -33,7 +33,8 @@
 public abstract class BufferedRowIterator {
   protected LinkedList<InternalRow> currentRows = new LinkedList<>();
   // used when there is no column in output
-  protected UnsafeRow unsafeRow = new UnsafeRow(0);
+  // Keep it public for codegen to access.
+  public UnsafeRow unsafeRow = new UnsafeRow(0);
   private long startTimeNs = System.nanoTime();
 
   protected int partitionIndex = -1;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
index 1c09cc9f7ff26..ea33aa1e23254 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
@@ -182,7 +182,8 @@ public void initBatch(
       DataType dt = requiredFields[i].dataType();
       if (requestedPartitionColIds[i] != -1) {
         ConstantColumnVector partitionCol = new ConstantColumnVector(capacity, dt);
-        ColumnVectorUtils.populate(partitionCol, partitionValues, requestedPartitionColIds[i]);
+        ColumnVectorUtils.populate(
+            partitionCol, partitionValues, requestedPartitionColIds[i], memoryMode);
         orcVectorWrappers[i] = partitionCol;
       } else {
         int colId = requestedDataColIds[i];
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
index 0e2c997e553f2..913ffaa1d3992 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
@@ -712,7 +712,9 @@ public void decodeSingleDictionaryId(
         WritableColumnVector values,
         WritableColumnVector dictionaryIds,
         Dictionary dictionary) {
-      values.putLong(offset, dictionary.decodeToLong(dictionaryIds.getDictId(offset)));
+      // 32-bit Decimal target (precision <= 9) is stored in `intData`; `longData` is
+      // unallocated, so use `putInt` with the same narrowing cast as `readValue`/`readValues`.
+      values.putInt(offset, (int) dictionary.decodeToLong(dictionaryIds.getDictId(offset)));
     }
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
index 72125701fd49e..5e782433f5576 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
@@ -303,7 +303,7 @@ private void initBatch(
       int partitionIdx = sparkSchema.fields().length;
       for (int i = 0; i < partitionColumns.fields().length; i++) {
         ColumnVectorUtils.populate(
-          (ConstantColumnVector) vectors[i + partitionIdx], partitionValues, i);
+          (ConstantColumnVector) vectors[i + partitionIdx], partitionValues, i, MEMORY_MODE);
       }
     }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/WKBConverterStrategy.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/WKBConverterStrategy.java
index a8be909512894..dc821f6c57103 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/WKBConverterStrategy.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/WKBConverterStrategy.java
@@ -46,6 +46,6 @@ enum WKBToGeographyConverter implements WKBConverterStrategy {
 
   @Override
   public byte[] convert(byte[] wkb, int srid) {
-    return STUtils.stGeogFromWKB(wkb).getBytes();
+    return STUtils.stGeogFromWKB(wkb, srid).getBytes();
   }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index 9ff385c995ff5..1ca9290e3b7c2 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -31,8 +31,10 @@
 import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
 import org.apache.spark.sql.catalyst.types.*;
 import org.apache.spark.sql.catalyst.util.DateTimeUtils;
+import org.apache.spark.sql.execution.RowToColumnConverter;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.sql.vectorized.ColumnarArray;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
@@ -49,9 +51,22 @@
 public class ColumnVectorUtils {
 
   /**
-   * Populates the value of `row[fieldIdx]` into `ConstantColumnVector`.
+   * Populates the value of `row[fieldIdx]` into `ConstantColumnVector`. For complex types
+   * (array / map) this allocates a small backing `WritableColumnVector` on-heap by default. Use
+   * the {@link #populate(ConstantColumnVector, InternalRow, int, MemoryMode)} overload to control
+   * the backing memory mode.
    */
   public static void populate(ConstantColumnVector col, InternalRow row, int fieldIdx) {
+    populate(col, row, fieldIdx, MemoryMode.ON_HEAP);
+  }
+
+  /**
+   * Populates the value of `row[fieldIdx]` into `ConstantColumnVector`. For array / map values,
+   * `memMode` selects on-heap vs off-heap allocation for the backing `WritableColumnVector` that
+   * holds the constant element data; it has no effect on primitive types.
+   */
+  public static void populate(
+      ConstantColumnVector col, InternalRow row, int fieldIdx, MemoryMode memMode) {
     DataType t = col.dataType();
     PhysicalDataType pdt = PhysicalDataType.apply(t);
 
@@ -93,6 +108,34 @@ public static void populate(ConstantColumnVector col, InternalRow row, int field
         col.setCalendarInterval((CalendarInterval) row.get(fieldIdx, t));
       } else if (pdt instanceof PhysicalVariantType) {
         col.setVariant((VariantVal)row.get(fieldIdx, t));
+      } else if (pdt instanceof PhysicalStructType) {
+        StructType st = (StructType) t;
+        InternalRow inner = row.getStruct(fieldIdx, st.fields().length);
+        InternalRow tmpRow = new GenericInternalRow(1);
+        for (int i = 0; i < st.fields().length; i++) {
+          StructField field = st.fields()[i];
+          tmpRow.update(0, inner.isNullAt(i) ? null : inner.get(i, field.dataType()));
+          // ConstantColumnVector's constructor pre-allocates struct children, so the recursive
+          // populate call below has a target vector to write into.
+          populate((ConstantColumnVector) col.getChild(i), tmpRow, 0, memMode);
+        }
+      } else if (pdt instanceof PhysicalArrayType || pdt instanceof PhysicalMapType) {
+        // Allocate a 1-row backing vector (on-heap or off-heap per `memMode`) to hold the
+        // constant complex value.
+        WritableColumnVector backing = memMode == MemoryMode.OFF_HEAP
+            ? new OffHeapColumnVector(1, t)
+            : new OnHeapColumnVector(1, t);
+        // Reuse RowToColumnConverter by wrapping `t` as a single-field struct schema and
+        // converting the one-row input. This recursively handles all element types correctly.
+        StructType wrapperSchema = new StructType().add("v", t, true);
+        RowToColumnConverter converter = new RowToColumnConverter(wrapperSchema);
+        InternalRow wrapped = new GenericInternalRow(new Object[]{row.get(fieldIdx, t)});
+        converter.convert(wrapped, new WritableColumnVector[]{backing});
+        if (pdt instanceof PhysicalArrayType) {
+          col.setArrayWithBacking(backing.getArray(0), backing);
+        } else {
+          col.setMapWithBacking(backing.getMap(0), backing);
+        }
       } else {
         throw new RuntimeException(String.format("DataType %s is not supported" +
             " in column vectorized reader.", t.sql()));
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
index cd2a821698853..094d6edb6d259 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
@@ -49,6 +49,8 @@ public class ConstantColumnVector extends ColumnVector {
   private ConstantColumnVector[] childData;
   private ColumnarArray arrayData;
   private ColumnarMap mapData;
+  // Optionally owned backing storage for arrayData / mapData. Closed by close().
+  private WritableColumnVector ownedBacking;
 
   private final int numRows;
 
@@ -62,6 +64,9 @@ public ConstantColumnVector(int numRows, DataType type) {
 
     if (type instanceof StructType structType) {
       this.childData = new ConstantColumnVector[structType.fields().length];
+      for (int i = 0; i < structType.fields().length; i++) {
+        this.childData[i] = new ConstantColumnVector(1, structType.fields()[i].dataType());
+      }
     } else if (type instanceof CalendarIntervalType) {
       // Three columns. Months as int. Days as Int. Microseconds as Long.
       this.childData = new ConstantColumnVector[3];
@@ -97,6 +102,10 @@ public void close() {
     }
     arrayData = null;
     mapData = null;
+    if (ownedBacking != null) {
+      ownedBacking.close();
+      ownedBacking = null;
+    }
   }
 
   @Override
@@ -218,24 +227,51 @@ public ColumnarArray getArray(int rowId) {
   }
 
   /**
-   * Sets the `ColumnarArray` `value` for all rows
+   * Sets the `ColumnarArray` `value` for all rows. The caller retains ownership of the backing
+   * storage for `value`; use `setArrayWithBacking` if this vector should own and close it.
    */
   public void setArray(ColumnarArray value) {
     arrayData = value;
   }
 
+  /**
+   * Sets the `ColumnarArray` `value` for all rows and takes ownership of `backing`, which will be
+   * closed when this vector is closed.
+   */
+  public void setArrayWithBacking(ColumnarArray value, WritableColumnVector backing) {
+    arrayData = value;
+    replaceOwnedBacking(backing);
+  }
+
   @Override
   public ColumnarMap getMap(int ordinal) {
     return mapData;
   }
 
   /**
-   * Sets the `ColumnarMap` `value` for all rows
+   * Sets the `ColumnarMap` `value` for all rows. The caller retains ownership of the backing
+   * storage for `value`; use `setMapWithBacking` if this vector should own and close it.
    */
   public void setMap(ColumnarMap value) {
     mapData = value;
   }
 
+  /**
+   * Sets the `ColumnarMap` `value` for all rows and takes ownership of `backing`, which will be
+   * closed when this vector is closed.
+   */
+  public void setMapWithBacking(ColumnarMap value, WritableColumnVector backing) {
+    mapData = value;
+    replaceOwnedBacking(backing);
+  }
+
+  private void replaceOwnedBacking(WritableColumnVector backing) {
+    if (ownedBacking != null && ownedBacking != backing) {
+      ownedBacking.close();
+    }
+    ownedBacking = backing;
+  }
+
   @Override
   public Decimal getDecimal(int rowId, int precision, int scale) {
     // copy and modify from WritableColumnVector
@@ -303,9 +339,14 @@ public ColumnVector getChild(int ordinal) {
   }
 
   /**
-   * Sets the child `ConstantColumnVector` `value` at the given ordinal for all rows
+   * Sets the child `ConstantColumnVector` `value` at the given ordinal for all rows. Closes any
+   * previously-set child at this ordinal (e.g., one auto-allocated by the constructor) to avoid
+   * leaking its backing storage.
    */
   public void setChild(int ordinal, ConstantColumnVector value) {
+    if (childData[ordinal] != null && childData[ordinal] != value) {
+      childData[ordinal].close();
+    }
     childData[ordinal] = value;
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
index a46b5143eef6d..96083b78be521 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
@@ -29,8 +29,7 @@
 import org.apache.spark.sql.vectorized.ColumnarMap;
 import org.apache.spark.sql.vectorized.ColumnarRow;
 import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.GeographyVal;
-import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.UTF8String;
 import org.apache.spark.unsafe.types.VariantVal;
 
@@ -78,10 +77,8 @@ public InternalRow copy() {
           row.update(i, getUTF8String(i).copy());
         } else if (dt instanceof BinaryType) {
           row.update(i, getBinary(i));
-        } else if (dt instanceof GeographyType) {
-          row.update(i, getGeography(i));
-        } else if (dt instanceof GeometryType) {
-          row.update(i, getGeometry(i));
+        } else if (dt instanceof GeographyType || dt instanceof GeometryType) {
+          row.update(i, getBinaryView(i).copy());
         } else if (dt instanceof DecimalType t) {
           row.setDecimal(i, getDecimal(i, t.precision(), t.scale()), t.precision());
         } else if (dt instanceof DateType) {
@@ -151,13 +148,8 @@ public byte[] getBinary(int ordinal) {
   }
 
   @Override
-  public GeographyVal getGeography(int ordinal) {
-    return columns[ordinal].getGeography(rowId);
-  }
-
-  @Override
-  public GeometryVal getGeometry(int ordinal) {
-    return columns[ordinal].getGeometry(rowId);
+  public BinaryView getBinaryView(int ordinal) {
+    return columns[ordinal].getBinaryView(rowId);
   }
 
   @Override
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index ba740602b4c2b..b5b6e2b2b319a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -23,6 +23,7 @@
 
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
@@ -248,6 +249,11 @@ protected UTF8String getBytesAsUTF8String(int rowId, int count) {
     return UTF8String.fromAddress(null, data + rowId, count);
   }
 
+  @Override
+  protected BinaryView getBytesAsBinaryView(int rowId, int count) {
+    return BinaryView.fromAddress(null, data + rowId, count);
+  }
+
   @Override
   public ByteBuffer getByteBuffer(int rowId, int count) {
     return ByteBuffer.wrap(getBytes(rowId, count));
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 3e1f4d7a4f838..a57e3575b089f 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -22,6 +22,7 @@
 
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
@@ -236,6 +237,11 @@ protected UTF8String getBytesAsUTF8String(int rowId, int count) {
     return UTF8String.fromBytes(byteData, rowId, count);
   }
 
+  @Override
+  protected BinaryView getBytesAsBinaryView(int rowId, int count) {
+    return BinaryView.fromBytes(byteData, rowId, count);
+  }
+
   @Override
   public ByteBuffer getByteBuffer(int rowId, int count) {
     return ByteBuffer.wrap(byteData, rowId, count);
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index 0f5b23ad85390..af27594440835 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -33,6 +33,7 @@
 import org.apache.spark.sql.vectorized.ColumnarMap;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.types.CalendarInterval;
+import org.apache.spark.unsafe.types.BinaryView;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
@@ -521,6 +522,24 @@ public byte[] getBinary(int rowId) {
     }
   }
 
+  @Override
+  public BinaryView getBinaryView(int rowId) {
+    if (isNullAt(rowId)) return null;
+    if (dictionary == null) {
+      return arrayData().getBytesAsBinaryView(getArrayOffset(rowId), getArrayLength(rowId));
+    } else {
+      byte[] bytes = dictionary.decodeToBinary(dictionaryIds.getDictId(rowId));
+      return BinaryView.fromBytes(bytes);
+    }
+  }
+
+  /**
+   * Gets the values of bytes from [rowId, rowId + count), as a BinaryView.
+   * This method is similar to {@link ColumnVector#getBytes(int, int)}, but can save data copy as
+   * BinaryView is used as a pointer.
+   */
+  protected abstract BinaryView getBytesAsBinaryView(int rowId, int count);
+
   /**
    * Gets the values of bytes from [rowId, rowId + count), as a ByteBuffer.
    * This method is similar to {@link ColumnVector#getBytes(int, int)}, but avoids making a copy.
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/allexecutionspage.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/allexecutionspage.js
index 34e3be4913ce4..ed4561c732242 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/allexecutionspage.js
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/allexecutionspage.js
@@ -15,109 +15,20 @@
  * limitations under the License.
  */
 
-/* global $, uiRoot, appBasePath */
-/* eslint-disable no-unused-vars */
-
-function formatDurationSql(milliseconds) {
-  if (milliseconds < 100) return parseInt(milliseconds).toFixed(1) + " ms";
-  var seconds = milliseconds / 1000;
-  if (seconds < 1) return seconds.toFixed(1) + " s";
-  if (seconds < 60) return seconds.toFixed(0) + " s";
-  var minutes = seconds / 60;
-  if (minutes < 10) return minutes.toFixed(1) + " min";
-  if (minutes < 60) return minutes.toFixed(0) + " min";
-  var hours = minutes / 60;
-  return hours.toFixed(1) + " h";
-}
-
-function formatDateSql(dateStr) {
-  if (!dateStr) return "";
-  try {
-    var dt = new Date(dateStr.replace("GMT", "Z"));
-    if (isNaN(dt.getTime())) return dateStr;
-    var pad = function(n) { return n < 10 ? "0" + n : n; };
-    return dt.getFullYear() + "-" + pad(dt.getMonth() + 1) + "-" + pad(dt.getDate()) + " " +
-      pad(dt.getHours()) + ":" + pad(dt.getMinutes()) + ":" + pad(dt.getSeconds());
-  } catch (e) { return dateStr; }
-}
-function escapeHtml(str) {
-  if (!str) return str;
-  var div = document.createElement("div");
-  div.appendChild(document.createTextNode(str));
-  return div.innerHTML;
-}
-/* eslint-enable no-unused-vars */
-
-function createSQLTableEndPoint(appId) {
-  var words = document.baseURI.split("/");
-  var ind = words.indexOf("proxy");
-  var newBaseURI;
-  if (ind > 0) {
-    appId = words[ind + 1];
-    newBaseURI = words.slice(0, ind + 2).join("/");
-    return newBaseURI + "/api/v1/applications/" + appId + "/sql/sqlTable";
-  }
-  ind = words.indexOf("history");
-  if (ind > 0) {
-    appId = words[ind + 1];
-    var attemptId = words[ind + 2];
-    newBaseURI = words.slice(0, ind).join("/");
-    if (isNaN(attemptId)) {
-      return newBaseURI + "/api/v1/applications/" + appId + "/sql/sqlTable";
-    } else {
-      return newBaseURI + "/api/v1/applications/" + appId + "/" +
-        attemptId + "/sql/sqlTable";
-    }
-  }
-  return uiRoot + "/api/v1/applications/" + appId + "/sql/sqlTable";
-}
-
-function statusBadge(status) {
-  var cls = "bg-secondary";
-  if (status === "COMPLETED") cls = "bg-success";
-  else if (status === "RUNNING") cls = "bg-primary";
-  else if (status === "FAILED") cls = "bg-danger";
-  return '<span class="badge ' + cls + '">' + status + '</span>';
-}
-
-function jobIdLinks(ids) {
-  if (!ids || ids.length === 0) return "";
-  var basePath = uiRoot + appBasePath;
-  return ids.map(function (id) {
-    return '<a href="' + basePath + '/jobs/job/?id=' + id + '">' + id + '</a>';
-  }).join(", ");
-}
-
-function descriptionHtml(exec) {
-  var desc = exec.description || "";
-  var basePath = uiRoot + appBasePath;
-  var url = basePath + "/SQL/execution/?id=" + exec.id;
-  if (desc.length > 100) {
-    var short = escapeHtml(desc.substring(0, 100)) + "...";
-    return '<a href="' + url + '" title="' + escapeHtml(desc) + '">' +
-      short + '</a>';
-  }
-  return '<a href="' + url + '">' + (escapeHtml(desc) || exec.id) + '</a>';
-}
-
-// Remove client-side filter — status filtering is now server-side
+/* global $, uiRoot, appBasePath, createSqlApiBase, getSqlTableColumns,
+   withResolvedAppId, statusBadge, jobIdLinks, formatDurationSql,
+   descriptionHtml */
 
 $(document).ready(function () {
-  // Resolve appId: check proxy/history in URL, fallback to REST API
-  var words = document.baseURI.split("/");
-  var appId = "";
-  var ind = words.indexOf("proxy");
-  if (ind > 0) {
-    appId = words[ind + 1];
-  } else {
-    ind = words.indexOf("history");
-    if (ind > 0) {
-      appId = words[ind + 1];
-    }
+  // Read the cluster-level grouping toggle rendered into the page by Scala
+  var groupSubExecEnabled = true;
+  var configEl = document.getElementById("group-sub-exec-config");
+  if (configEl) {
+    groupSubExecEnabled = configEl.getAttribute("data-value") === "true";
   }
 
   function init(resolvedAppId) {
-    var sqlTableEndPoint = createSQLTableEndPoint(resolvedAppId);
+    var sqlTableEndPoint = createSqlApiBase(resolvedAppId) + "/sqlTable";
 
     var container = document.getElementById("sql-executions-table");
     container.innerHTML =
@@ -131,6 +42,27 @@ $(document).ready(function () {
       '<table id="sql-table" class="table table-striped compact cell-border" ' +
       'style="width:100%"></table>';
 
+    var columns = getSqlTableColumns({ detail: false });
+    if (groupSubExecEnabled) {
+      // Trailing "Sub Executions" column matching the SPARK-41752 / 4.1 layout:
+      // shows "+N sub" when the root has children, blank otherwise. Click to
+      // expand a child row containing the sub-execution rows.
+      columns.push({
+        data: null, name: "subExecutions", title: "Sub Executions",
+        orderable: false, searchable: false,
+        className: "sub-exec-toggle",
+        render: function (data, type, row) {
+          if (type !== "display") return "";
+          var subs = row.subExecutions || [];
+          if (subs.length === 0) return "";
+          var childId = "sub-exec-" + row.id;
+          return '<a href="#" class="toggle-sub-exec" role="button" ' +
+            'aria-expanded="false" aria-controls="' + childId + '">' +
+            '+' + subs.length + ' sub</a>';
+        }
+      });
+    }
+
     var table = $("#sql-table").DataTable({
       serverSide: true,
       processing: true,
@@ -146,97 +78,88 @@ $(document).ready(function () {
           if (sel) {
             d.status = sel;
           }
+          d.groupSubExecution = groupSubExecEnabled ? "true" : "false";
         },
         dataSrc: function (json) { return json.aaData; },
         error: function () {
           $("#sql-table_processing").css("display", "none");
         }
       },
-      columns: [
-        {
-          data: "id", name: "id", title: "ID",
-          render: function (data, type) {
-            if (type !== "display") return data;
-            var basePath = uiRoot + appBasePath;
-            return '<a href="' + basePath + '/SQL/execution/?id=' + data + '">' +
-              data + '</a>';
-          }
-        },
-        {
-          data: "queryId", name: "queryId", title: "Query ID",
-          orderable: false,
-          render: function (data, type) {
-            if (type !== "display" || !data) return data || "";
-            return '<span title="' + data + '">' + data.substring(0, 8) + '...</span>';
-          }
-        },
-        {
-          data: "status", name: "status", title: "Status",
-          render: function (data, type) {
-            if (type !== "display") return data;
-            return statusBadge(data);
-          }
-        },
-        {
-          data: "description", name: "description", title: "Description",
-          render: function (data, type, row) {
-            if (type !== "display") return data || "";
-            return descriptionHtml({ id: row.id, description: data });
-          }
-        },
-        {
-          data: "submissionTime", name: "submissionTime", title: "Submitted",
-          render: function (data, type) {
-            if (type !== "display") return data;
-            return formatDateSql(data);
-          }
-        },
-        {
-          data: "duration", name: "duration", title: "Duration",
-          render: function (data, type) {
-            if (type !== "display") return data;
-            return formatDurationSql(data);
-          }
-        },
-        {
-          data: "jobIds", name: "jobIds", title: "Succeeded Jobs",
-          orderable: false,
-          render: function (data, type) {
-            if (type !== "display") return (data || []).join(",");
-            return jobIdLinks(data || []);
-          }
-        },
-        {
-          data: "errorMessage", name: "errorMessage", title: "Error Message",
-          orderable: false,
-          render: function (data, type) {
-            if (type !== "display" || !data) return data || "";
-            if (data.length > 100) {
-              return '<span title="' + escapeHtml(data) + '">' +
-                escapeHtml(data.substring(0, 100)) + '...</span>';
-            }
-            return escapeHtml(data);
-          }
-        }
-      ],
+      columns: columns,
       order: [[0, "desc"]],
       language: { search: "Search:&#160;" }
     });
 
+    // Child-row expansion for sub-executions. Sub data is embedded per root row
+    // in the server payload (`row.subExecutions`), so no second fetch is needed.
+    // Under serverSide: true DataTables destroys/recreates rows on every sort,
+    // filter or page change, so we track expanded row IDs out-of-band and
+    // re-attach the child on each draw.
+    if (groupSubExecEnabled) {
+      var expandedRowIds = {};
+
+      var renderSubExecutionsHtml = function (rowData) {
+        var subs = (rowData && rowData.subExecutions) || [];
+        var basePath = uiRoot + appBasePath;
+        var childId = "sub-exec-" + (rowData && rowData.id);
+        var html = '<table id="' + childId +
+          '" class="table table-sm table-bordered mb-0 sub-exec-table">';
+        html += '<thead><tr><th>ID</th><th>Status</th><th>Description</th>' +
+          '<th>Duration</th><th>Succeeded Jobs</th></tr></thead><tbody>';
+        subs.forEach(function (child) {
+          html += '<tr><td><a href="' + basePath + '/SQL/execution/?id=' +
+            child.id + '">' + child.id + '</a></td>';
+          html += '<td>' + statusBadge(child.status) + '</td>';
+          html += '<td>' + descriptionHtml({
+            id: child.id, description: child.description || ""
+          }) + '</td>';
+          html += '<td>' + formatDurationSql(child.duration) + '</td>';
+          html += '<td>' + jobIdLinks(child.jobIds || []) + '</td></tr>';
+        });
+        html += '</tbody></table>';
+        return html;
+      };
+
+      $("#sql-table tbody").on("click", "a.toggle-sub-exec", function (e) {
+        e.preventDefault();
+        var tr = $(this).closest("tr");
+        var dtRow = table.row(tr);
+        var rowData = dtRow.data();
+        var subs = (rowData && rowData.subExecutions) || [];
+        if (dtRow.child.isShown()) {
+          dtRow.child.hide();
+          tr.removeClass("shown");
+          $(this).text("+" + subs.length + " sub").attr("aria-expanded", "false");
+          delete expandedRowIds[rowData.id];
+        } else {
+          dtRow.child(renderSubExecutionsHtml(rowData)).show();
+          tr.addClass("shown");
+          $(this).text("\u2212" + subs.length + " sub").attr("aria-expanded", "true");
+          expandedRowIds[rowData.id] = true;
+        }
+      });
+
+      table.on("draw", function () {
+        $("#sql-table tbody > tr").each(function () {
+          var dtRow = table.row(this);
+          var data = dtRow.data();
+          if (data && expandedRowIds[data.id]) {
+            var subs = data.subExecutions || [];
+            dtRow.child(renderSubExecutionsHtml(data)).show();
+            $(this).addClass("shown");
+            $(this).find("a.toggle-sub-exec")
+              .text("\u2212" + subs.length + " sub")
+              .attr("aria-expanded", "true");
+          }
+        });
+      });
+    }
+
     $("#status-filter").on("change", function () {
       table.draw();
     });
 
   } // end init
 
-  if (appId) {
-    init(appId);
-  } else {
-    // Standalone mode: fetch appId from REST API
-    $.getJSON(uiRoot + "/api/v1/applications", function (response) {
-      if (response && response.length > 0) {
-        init(response[0].id);
-      }
-    });
-  }
+  withResolvedAppId(init);
 });
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/executionpage.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/executionpage.js
new file mode 100644
index 0000000000000..4cb9a65c05100
--- /dev/null
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/executionpage.js
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* global $, createSqlApiBase, getSqlTableColumns, withResolvedAppId */
+
+// Renders the single-row summary table at the top of the SQL execution detail
+// page using the same column layout as the SQL listing page. Fetches the
+// execution data from the v1 API and feeds it into a one-row DataTable.
+$(document).ready(function () {
+  var tableEl = document.getElementById("sql-execution-table");
+  if (!tableEl) return;
+
+  var executionId = tableEl.getAttribute("data-execution-id");
+  if (!executionId) return;
+
+  function init(resolvedAppId) {
+    var endpoint = createSqlApiBase(resolvedAppId) + "/" + executionId +
+      "?details=false&planDescription=false";
+    $.getJSON(endpoint, function (data) {
+      // ExecutionData fields: id, status, description, submissionTime, duration,
+      //   runningJobIds, successJobIds, failedJobIds, queryId, errorMessage,
+      //   rootExecutionId. Map to the row shape consumed by getSqlTableColumns:
+      //   id, queryId, status, description, submissionTime, duration, jobIds,
+      //   errorMessage.
+      var row = {
+        id: data.id,
+        queryId: data.queryId || "",
+        status: data.status,
+        description: data.description || "",
+        submissionTime: data.submissionTime,
+        duration: data.duration,
+        jobIds: data.successJobIds || [],
+        errorMessage: data.errorMessage || ""
+      };
+
+      $("#sql-execution-table").DataTable({
+        data: [row],
+        columns: getSqlTableColumns({ detail: true }),
+        paging: false,
+        searching: false,
+        info: false,
+        ordering: false,
+        dom: "t"
+      });
+    }).fail(function () {
+      $("#sql-execution-table").replaceWith(
+        '<div class="alert alert-warning">' +
+        'Failed to load execution metadata.</div>');
+    });
+  }
+
+  withResolvedAppId(init);
+});
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
index 0659d0c80de0a..738e70fdd1279 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
@@ -23,6 +23,7 @@
   --spark-sql-selected-fill: #E25A1CFF;
   --spark-sql-selected-stroke: #317EACFF;
   --spark-sql-linked-fill: #FFC106FF;
+  --spark-sql-search-stroke: #FD7E14;
 }
 [data-bs-theme="dark"] {
   --spark-sql-cluster-fill: #1a5276;
@@ -32,6 +33,7 @@
   --spark-sql-selected-fill: #c0470fff;
   --spark-sql-selected-stroke: #5dade2ff;
   --spark-sql-linked-fill: #d4a00aff;
+  --spark-sql-search-stroke: #ffa94d;
 }
 
 svg g.label {
@@ -138,5 +140,106 @@ svg path.linked {
 }
 
 #plan-viz-graph {
-  overflow-x: auto;
+  position: relative;
+}
+
+#plan-viz-graph svg {
+  width: 100%;
+  height: 70vh;
+  max-height: 70vh;
+  display: block;
+  cursor: grab;
+  background-color: var(--bs-body-bg);
+  user-select: none;
+}
+
+#plan-viz-graph svg.grabbing {
+  cursor: grabbing;
+}
+
+/* Allow text selection inside HTML labels (detailed mode metrics tables) */
+#plan-viz-graph svg foreignObject,
+#plan-viz-graph svg foreignObject * {
+  user-select: text;
+}
+
+.plan-viz-zoom-toolbar {
+  position: absolute;
+  top: 8px;
+  right: 16px;
+  z-index: 10;
+  display: flex;
+  align-items: center;
+}
+
+.plan-viz-zoom-toolbar #plan-viz-zoom-level {
+  display: inline-block;
+  min-width: 3.25rem;
+  font-variant-numeric: tabular-nums;
+}
+
+#plan-viz-search-expanded {
+  width: auto;
+}
+
+#plan-viz-search-input {
+  width: 12rem;
+}
+
+#plan-viz-search-count {
+  min-width: 4.5rem;
+  justify-content: center;
+  font-variant-numeric: tabular-nums;
+  font-size: 0.75rem;
+}
+
+#plan-viz-search-count.no-match {
+  color: var(--bs-danger);
+}
+
+/* Search result highlight: outline matched node/cluster with the search color. */
+svg g.node rect.search-match,
+svg g.cluster.search-match > rect {
+  stroke: var(--spark-sql-search-stroke);
+  stroke-width: 3px;
+}
+
+/* Currently active match: keep the outline visible and add an accent fill. */
+svg g.node rect.search-match.search-active {
+  fill: var(--spark-sql-linked-fill);
+}
+
+/* Dim non-matching nodes/clusters when a search is active.
+   `!important` is needed because dagre-d3 writes an inline `opacity: 1`
+   style on each <g class="node">/<g class="cluster"> at render time. */
+svg g.node.search-dimmed,
+svg g.cluster.search-dimmed {
+  opacity: 0.3 !important;
+}
+
+/* Inline <pre> renderer used by the shared SQL DataTable column factory
+ * (sql-table-utils.js) on the execution detail page. Keeps long SQL/error
+ * text legible without stretching the row. */
+.sql-cell-pre {
+  font-size: 0.85rem;
+  max-height: 300px;
+  overflow: auto;
+  white-space: pre-wrap;
+  word-break: break-word;
+  margin: 0;
+  padding: 0.25rem 0.5rem;
+  background-color: transparent;
+}
+
+/* Collapsible disclosure used for long Description / Error Message cells.
+ * Uses the native <details>/<summary> element so no JavaScript is needed. */
+.sql-cell-details > summary {
+  cursor: pointer;
+  font-family: var(--bs-font-monospace);
+  font-size: 0.85rem;
+  list-style: revert;
+  user-select: none;
+}
+.sql-cell-details[open] > summary {
+  margin-bottom: 0.25rem;
 }
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
index 51fd4ce963116..81d4562cbb35d 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
@@ -19,13 +19,25 @@
 
 var PlanVizConstants = {
   svgMarginX: 16,
-  svgMarginY: 16
+  svgMarginY: 16,
+  zoomMin: 0.1,
+  zoomMax: 16,
+  zoomStep: 1.25
 };
 
 // Track selected node for re-rendering the detail panel on checkbox toggle
 var selectedNodeId = null;
 var cachedNodeDetails = null;
 
+// d3.zoom behavior for the current SVG; reinitialized on each (re)render.
+var planVizZoom = null;
+
+// Current dagre graph; kept so node search can re-iterate names without re-parsing dot.
+var currentPlanGraph = null;
+
+// Node search state. matches[] is in DOM (top-down) order, index is the active match.
+var planVizSearchState = { query: "", matches: [], index: -1 };
+
 function shouldRenderPlanViz() {
   return planVizContainer().selectAll("svg").empty();
 }
@@ -33,13 +45,15 @@ function shouldRenderPlanViz() {
 function renderPlanViz() {
   var svg = planVizContainer()
     .append("svg")
-    .attr("width", window.innerWidth || 1920)
-    .attr("height", 1000);
+    .attr("width", "100%")
+    .attr("height", "70vh");
   var metadata = d3.select("#plan-viz-metadata");
   var dot = metadata.select(".dot-file").text().trim();
-  var graph = svg.append("g");
+  var zoomLayer = svg.append("g").attr("class", "zoom-layer");
+  var graph = zoomLayer.append("g");
 
   var g = graphlibDot.read(dot);
+  currentPlanGraph = g;
   preprocessGraphLayout(g);
   var renderer = new dagreD3.render();
   renderer(graph, g);
@@ -56,6 +70,8 @@ function renderPlanViz() {
   resizeSvg(svg);
   postprocessForAdditionalMetrics();
   setupDetailedLabelsToggle();
+  setupZoomAndPan(svg, zoomLayer);
+  reapplyPlanVizSearch();
 }
 
 /* -------------------- *
@@ -167,7 +183,9 @@ function preprocessGraphLayout(g) {
 }
 
 /*
- * Helper function to size the SVG appropriately such that all elements are displayed.
+ * Helper function to compute the SVG viewBox so that all elements fit.
+ * The SVG element itself uses CSS sizing (width: 100%, height: 70vh),
+ * so we only set the viewBox here; pan/zoom is applied to the inner zoom-layer.
  * This assumes that all outermost elements are clusters (rectangles).
  */
 function resizeSvg(svg) {
@@ -192,9 +210,7 @@ function resizeSvg(svg) {
     }));
   var width = endX - startX;
   var height = endY - startY;
-  svg.attr("viewBox", startX + " " + startY + " " + width + " " + height)
-     .attr("width", width)
-     .attr("height", height);
+  svg.attr("viewBox", startX + " " + startY + " " + width + " " + height);
 }
 
 /* Helper function to convert attributes to numeric values. */
@@ -596,6 +612,16 @@ document.getElementById("plan-viz-download-btn").addEventListener("click", async
       console.error("Failed to fetch CSS for SVG download", e);
     }
     d3.select(svg).insert("style", ":first-child").text(css);
+    // Reset any pan/zoom transform on the cloned SVG so the exported file
+    // shows the natural plan, independent of the user's current view state.
+    d3.select(svg).select("g.zoom-layer").attr("transform", null);
+    // Make the standalone SVG self-sized using the viewBox dimensions so
+    // external viewers render at the natural plan size.
+    const viewBox = (svg.getAttribute("viewBox") || "").split(/\s+/).map(parseFloat);
+    if (viewBox.length === 4 && viewBox.every((v) => !isNaN(v))) {
+      svg.setAttribute("width", String(viewBox[2]));
+      svg.setAttribute("height", String(viewBox[3]));
+    }
     const svgData = new XMLSerializer().serializeToString(svg);
     blob = new Blob([svgData], { type: "image/svg+xml" });
   } else if (format === "dot") {
@@ -655,11 +681,13 @@ function rerenderWithDetailedLabels() {
 
   var svg = container
     .append("svg")
-    .attr("width", window.innerWidth || 1920)
-    .attr("height", 1000);
-  var graph = svg.append("g");
+    .attr("width", "100%")
+    .attr("height", "70vh");
+  var zoomLayer = svg.append("g").attr("class", "zoom-layer");
+  var graph = zoomLayer.append("g");
 
   var g = graphlibDot.read(dot);
+  currentPlanGraph = g;
 
   // If detailed mode, inject HTML labels with metrics tables
   var detailed = document.getElementById("detailed-labels-checkbox");
@@ -694,12 +722,364 @@ function rerenderWithDetailedLabels() {
   setupTooltipForSparkPlanNode(g);
   resizeSvg(svg);
   postprocessForAdditionalMetrics();
+  setupZoomAndPan(svg, zoomLayer);
+  reapplyPlanVizSearch();
+}
+
+/* ---------------------- *
+ * | Zoom and pan        | *
+ * ---------------------- */
+
+/*
+ * Wire d3-zoom to the SVG: apply transforms to the inner zoom-layer group,
+ * fit the graph to the viewport on load, and bind toolbar/keyboard controls.
+ */
+function setupZoomAndPan(svg, zoomLayer) {
+  var svgNode = svg.node();
+  if (!svgNode || !zoomLayer || zoomLayer.empty()) return;
+
+  planVizZoom = d3.zoom()
+    .scaleExtent([PlanVizConstants.zoomMin, PlanVizConstants.zoomMax])
+    .filter(function (event) {
+      // Suppress pan/zoom when the user interacts with HTML labels
+      // (foreignObject) so text inside metrics tables remains selectable.
+      // Also ignore right-click to leave the browser context menu intact.
+      if (event.button === 2) return false;
+      var target = event.target;
+      while (target && target !== svgNode) {
+        if (target.nodeName === "foreignObject") return false;
+        target = target.parentNode;
+      }
+      return true;
+    })
+    .on("start", function () { svg.classed("grabbing", true); })
+    .on("zoom", function (event) {
+      zoomLayer.attr("transform", event.transform);
+      updateZoomLevelLabel(event.transform.k);
+    })
+    .on("end", function () { svg.classed("grabbing", false); });
+
+  svg.call(planVizZoom);
+
+  // Initialize the toolbar label; the SVG's viewBox + xMidYMid meet already
+  // provides the natural fit, and the zoom-layer has no transform, so no
+  // explicit transform is required here.
+  updateZoomLevelLabel(1);
+}
+
+function updateZoomLevelLabel(scale) {
+  var el = document.getElementById("plan-viz-zoom-level");
+  if (el) el.textContent = Math.round(scale * 100) + "%";
+}
+
+function planVizZoomBy(factor) {
+  var svg = planVizContainer().select("svg");
+  if (!svg.empty() && planVizZoom) {
+    svg.transition().duration(150).call(planVizZoom.scaleBy, factor);
+  }
+}
+
+function planVizZoomReset() {
+  var svg = planVizContainer().select("svg");
+  if (!svg.empty() && planVizZoom) {
+    svg.transition().duration(150).call(planVizZoom.transform, d3.zoomIdentity);
+  }
+}
+
+/* ---------------------- *
+ * | Node search         | *
+ * ---------------------- */
+
+/*
+ * Wire the search toolbar (toggle, input, navigation, close) and the global
+ * `/` shortcut. Idempotent: subsequent calls do nothing.
+ */
+function setupPlanVizSearch() {
+  var toggle = document.getElementById("plan-viz-search-toggle");
+  var input = document.getElementById("plan-viz-search-input");
+  var prevBtn = document.getElementById("plan-viz-search-prev");
+  var nextBtn = document.getElementById("plan-viz-search-next");
+  var closeBtn = document.getElementById("plan-viz-search-close");
+  if (!toggle || !input || !prevBtn || !nextBtn || !closeBtn) return;
+  if (toggle.dataset.searchWired === "true") return;
+  toggle.dataset.searchWired = "true";
+
+  toggle.addEventListener("click", function () {
+    expandPlanVizSearch();
+  });
+  closeBtn.addEventListener("click", function () {
+    collapsePlanVizSearch();
+  });
+
+  var debounceTimer = null;
+  input.addEventListener("input", function () {
+    if (debounceTimer) clearTimeout(debounceTimer);
+    debounceTimer = setTimeout(function () {
+      runPlanVizSearch(input.value, true);
+    }, 80);
+  });
+
+  input.addEventListener("keydown", function (event) {
+    if (event.key === "Enter") {
+      event.preventDefault();
+      planVizSearchGoTo(event.shiftKey ? -1 : 1);
+    } else if (event.key === "Escape") {
+      event.preventDefault();
+      collapsePlanVizSearch();
+    }
+  });
+
+  prevBtn.addEventListener("click", function () { planVizSearchGoTo(-1); });
+  nextBtn.addEventListener("click", function () { planVizSearchGoTo(1); });
+
+  document.addEventListener("keydown", function (event) {
+    if (event.ctrlKey || event.metaKey || event.altKey) return;
+    if (event.key !== "/") return;
+    var tag = event.target && event.target.tagName;
+    if (tag === "INPUT" || tag === "TEXTAREA" || event.target.isContentEditable) {
+      return;
+    }
+    var graphEl = document.getElementById("plan-viz-graph");
+    if (!graphEl || !graphEl.matches(":hover")) return;
+    event.preventDefault();
+    expandPlanVizSearch();
+  });
+}
+
+function expandPlanVizSearch() {
+  var collapsed = document.getElementById("plan-viz-search-collapsed");
+  var expanded = document.getElementById("plan-viz-search-expanded");
+  var input = document.getElementById("plan-viz-search-input");
+  if (!collapsed || !expanded || !input) return;
+  collapsed.classList.add("d-none");
+  expanded.classList.remove("d-none");
+  input.value = planVizSearchState.query;
+  input.focus();
+  input.select();
+}
+
+function collapsePlanVizSearch() {
+  var collapsed = document.getElementById("plan-viz-search-collapsed");
+  var expanded = document.getElementById("plan-viz-search-expanded");
+  var input = document.getElementById("plan-viz-search-input");
+  clearPlanVizSearchHighlights();
+  planVizSearchState.query = "";
+  planVizSearchState.matches = [];
+  planVizSearchState.index = -1;
+  updatePlanVizSearchCount();
+  if (input) input.value = "";
+  if (collapsed) collapsed.classList.remove("d-none");
+  if (expanded) expanded.classList.add("d-none");
+}
+
+/*
+ * Recompute matches against the current query, update DOM classes, and zoom
+ * to the first match. When `zoomToFirst` is false (e.g. re-applying after a
+ * detailed-mode rerender), the viewport is left untouched.
+ */
+function runPlanVizSearch(rawQuery, zoomToFirst) {
+  var query = (rawQuery || "").trim();
+  planVizSearchState.query = query;
+  planVizSearchState.matches = [];
+  planVizSearchState.index = -1;
+
+  clearPlanVizSearchHighlights();
+
+  if (query === "" || !currentPlanGraph) {
+    updatePlanVizSearchCount();
+    return;
+  }
+
+  var lower = query.toLowerCase();
+  var nodeDetails = getNodeDetails();
+  var matchedDomIds = Object.create(null);
+  var ancestorsOfMatch = Object.create(null);
+
+  currentPlanGraph.nodes().forEach(function (v) {
+    var node = currentPlanGraph.node(v);
+    if (!node) return;
+    var domId = node.id || (node.isCluster ? v : "node" + v);
+    var displayName;
+    if (nodeDetails[domId] && nodeDetails[domId].name) {
+      displayName = String(nodeDetails[domId].name);
+    } else {
+      displayName = String(node.label || "");
+    }
+    if (displayName.toLowerCase().indexOf(lower) >= 0) {
+      matchedDomIds[domId] = true;
+      // Walk up the compound-graph hierarchy so we don't dim a cluster that
+      // contains a match (which would visually hide the matched child).
+      var parent = currentPlanGraph.parent(v);
+      while (parent) {
+        var parentNode = currentPlanGraph.node(parent);
+        var parentDomId = (parentNode && parentNode.id) || parent;
+        ancestorsOfMatch[parentDomId] = true;
+        parent = currentPlanGraph.parent(parent);
+      }
+    }
+  });
+
+  var svg = planVizContainer().select("svg");
+  var nodeEls = svg.selectAll("g.node, g.cluster").nodes();
+  var orderedMatches = [];
+  var anyMatch = false;
+  Object.keys(matchedDomIds).forEach(function () { anyMatch = true; });
+  nodeEls.forEach(function (el) {
+    if (matchedDomIds[el.id]) {
+      orderedMatches.push(el.id);
+      var rect = el.querySelector(":scope > rect");
+      if (rect) rect.classList.add("search-match");
+      if (el.classList.contains("cluster")) el.classList.add("search-match");
+    } else if (anyMatch && !ancestorsOfMatch[el.id]) {
+      // Only dim when at least one match exists; otherwise leave the plan
+      // fully visible so the user can adjust their query without obscuring
+      // context (matches familiar find-in-page UX).
+      el.classList.add("search-dimmed");
+    }
+  });
+
+  planVizSearchState.matches = orderedMatches;
+  if (orderedMatches.length > 0) {
+    planVizSearchState.index = 0;
+    markActiveMatch(orderedMatches[0]);
+    if (zoomToFirst) zoomToNode(orderedMatches[0]);
+  }
+  updatePlanVizSearchCount();
+}
+
+function planVizSearchGoTo(delta) {
+  var matches = planVizSearchState.matches;
+  if (matches.length === 0) return;
+  var idx = (planVizSearchState.index + delta + matches.length) % matches.length;
+  planVizSearchState.index = idx;
+  markActiveMatch(matches[idx]);
+  zoomToNode(matches[idx]);
+  updatePlanVizSearchCount();
+}
+
+function markActiveMatch(domId) {
+  planVizContainer().selectAll("rect.search-active")
+    .classed("search-active", false);
+  if (!domId) return;
+  var el = document.getElementById(domId);
+  if (!el) return;
+  var rect = el.querySelector(":scope > rect");
+  if (rect) rect.classList.add("search-active");
+}
+
+function clearPlanVizSearchHighlights() {
+  var container = planVizContainer();
+  container.selectAll(".search-match").classed("search-match", false);
+  container.selectAll(".search-active").classed("search-active", false);
+  container.selectAll(".search-dimmed").classed("search-dimmed", false);
+}
+
+function updatePlanVizSearchCount() {
+  var el = document.getElementById("plan-viz-search-count");
+  if (!el) return;
+  var matches = planVizSearchState.matches;
+  if (planVizSearchState.query === "") {
+    el.textContent = "";
+    el.classList.remove("no-match");
+  } else if (matches.length === 0) {
+    el.textContent = "0/0";
+    el.classList.add("no-match");
+  } else {
+    el.textContent = (planVizSearchState.index + 1) + "/" + matches.length;
+    el.classList.remove("no-match");
+  }
+}
+
+/* Center and scale the viewport on the given DOM element using d3-zoom. */
+function zoomToNode(domId) {
+  var el = document.getElementById(domId);
+  if (!el || !planVizZoom) return;
+  var svg = planVizContainer().select("svg");
+  var svgNode = svg.node();
+  if (!svgNode) return;
+  var vb = svgNode.viewBox && svgNode.viewBox.baseVal;
+  if (!vb || vb.width === 0 || vb.height === 0) return;
+
+  var bbox;
+  try {
+    bbox = el.getBBox();
+  } catch (e) {
+    return;
+  }
+  if (!bbox || bbox.width === 0 || bbox.height === 0) return;
+
+  // Aim to fill ~50% of the viewport so the matched node is prominent but
+  // still in context. Clamp to the configured zoom range.
+  var scale = Math.min(
+    vb.width / bbox.width / 2,
+    vb.height / bbox.height / 2,
+    PlanVizConstants.zoomMax
+  );
+  scale = Math.max(scale, PlanVizConstants.zoomMin);
+
+  var bcx = bbox.x + bbox.width / 2;
+  var bcy = bbox.y + bbox.height / 2;
+  var vcx = vb.x + vb.width / 2;
+  var vcy = vb.y + vb.height / 2;
+  var transform = d3.zoomIdentity
+    .translate(vcx - bcx * scale, vcy - bcy * scale)
+    .scale(scale);
+
+  svg.transition().duration(400).call(planVizZoom.transform, transform);
 }
+
+/*
+ * After a render or detailed-mode toggle, reapply the active query against the
+ * fresh DOM so highlights survive re-layout. No-ops when no search is active.
+ */
+function reapplyPlanVizSearch() {
+  if (!planVizSearchState.query) return;
+  runPlanVizSearch(planVizSearchState.query, false);
+}
+
 document.addEventListener("DOMContentLoaded", function () {
   if (shouldRenderPlanViz()) {
     renderPlanViz();
   }
 
+  var zoomInBtn = document.getElementById("plan-viz-zoom-in");
+  if (zoomInBtn) {
+    zoomInBtn.addEventListener("click", function () {
+      planVizZoomBy(PlanVizConstants.zoomStep);
+    });
+  }
+  var zoomOutBtn = document.getElementById("plan-viz-zoom-out");
+  if (zoomOutBtn) {
+    zoomOutBtn.addEventListener("click", function () {
+      planVizZoomBy(1 / PlanVizConstants.zoomStep);
+    });
+  }
+  var zoomResetBtn = document.getElementById("plan-viz-zoom-reset");
+  if (zoomResetBtn) {
+    zoomResetBtn.addEventListener("click", planVizZoomReset);
+  }
+
+  setupPlanVizSearch();
+
+  // Keyboard shortcuts when the SVG is focused or the user hovers over it.
+  document.addEventListener("keydown", function (event) {
+    if (event.ctrlKey || event.metaKey || event.altKey) return;
+    var tag = event.target && event.target.tagName;
+    if (tag === "INPUT" || tag === "TEXTAREA" || event.target.isContentEditable) return;
+    var graphEl = document.getElementById("plan-viz-graph");
+    if (!graphEl || !graphEl.matches(":hover")) return;
+    if (event.key === "+" || event.key === "=") {
+      planVizZoomBy(PlanVizConstants.zoomStep);
+      event.preventDefault();
+    } else if (event.key === "-" || event.key === "_") {
+      planVizZoomBy(1 / PlanVizConstants.zoomStep);
+      event.preventDefault();
+    } else if (event.key === "0") {
+      planVizZoomReset();
+      event.preventDefault();
+    }
+  });
+
   // Copy physical plan text to clipboard
   var copyPlanBtn = document.getElementById("copy-plan-btn");
   if (copyPlanBtn) {
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/sql-table-utils.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/sql-table-utils.js
new file mode 100644
index 0000000000000..7b0f4ffccde58
--- /dev/null
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/sql-table-utils.js
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* global $, uiRoot, appBasePath */
+/* eslint-disable no-unused-vars */
+
+function formatDurationSql(milliseconds) {
+  if (milliseconds < 100) return parseInt(milliseconds).toFixed(1) + " ms";
+  var seconds = milliseconds / 1000;
+  if (seconds < 1) return seconds.toFixed(1) + " s";
+  if (seconds < 60) return seconds.toFixed(0) + " s";
+  var minutes = seconds / 60;
+  if (minutes < 10) return minutes.toFixed(1) + " min";
+  if (minutes < 60) return minutes.toFixed(0) + " min";
+  var hours = minutes / 60;
+  return hours.toFixed(1) + " h";
+}
+
+function formatDateSql(dateStr) {
+  if (!dateStr) return "";
+  try {
+    var dt = new Date(dateStr.replace("GMT", "Z"));
+    if (isNaN(dt.getTime())) return dateStr;
+    var pad = function (n) { return n < 10 ? "0" + n : n; };
+    return dt.getFullYear() + "-" + pad(dt.getMonth() + 1) + "-" + pad(dt.getDate()) + " " +
+      pad(dt.getHours()) + ":" + pad(dt.getMinutes()) + ":" + pad(dt.getSeconds());
+  } catch (e) { return dateStr; }
+}
+
+function escapeHtml(str) {
+  if (!str) return str;
+  var div = document.createElement("div");
+  div.appendChild(document.createTextNode(str));
+  return div.innerHTML;
+}
+
+function statusBadge(status) {
+  var cls = "bg-secondary";
+  if (status === "COMPLETED") cls = "bg-success";
+  else if (status === "RUNNING") cls = "bg-primary";
+  else if (status === "FAILED") cls = "bg-danger";
+  return '<span class="badge ' + cls + '">' + status + '</span>';
+}
+
+function jobIdLinks(ids) {
+  if (!ids || ids.length === 0) return "";
+  var basePath = uiRoot + appBasePath;
+  return ids.map(function (id) {
+    return '<a href="' + basePath + '/jobs/job/?id=' + id + '">' + id + '</a>';
+  }).join(", ");
+}
+
+// Render a description cell.
+//   exec   - {id, description}
+//   opts.detail - if true: render full text in <pre class="sql-cell-pre">,
+//                 no truncation, no self-link. Used by the SQL detail page.
+//                 Multi-line / long descriptions are wrapped in <details>
+//                 so the cell starts collapsed with a one-line summary.
+//                 if false (default): truncate to 100 chars, render as a
+//                 link to the execution detail page. Used by the list page
+//                 and by sub-execution child rows.
+function descriptionHtml(exec, opts) {
+  opts = opts || {};
+  var desc = exec.description || "";
+  if (opts.detail) {
+    if (!desc) {
+      return '<span class="text-muted">(no description)</span>';
+    }
+    return collapsiblePre(desc);
+  }
+  var basePath = uiRoot + appBasePath;
+  var url = basePath + "/SQL/execution/?id=" + exec.id;
+  if (desc.length > 100) {
+    var short = escapeHtml(desc.substring(0, 100)) + "...";
+    return '<a href="' + url + '" title="' + escapeHtml(desc) + '">' +
+      short + '</a>';
+  }
+  return '<a href="' + url + '">' + (escapeHtml(desc) || exec.id) + '</a>';
+}
+
+// Render a long, possibly multi-line value as either a plain <pre> when it
+// fits one short line, or a <details>/<summary>/<pre> disclosure block
+// otherwise. The summary shows the first 100 characters of the first line so
+// the cell stays compact when collapsed.
+function collapsiblePre(text) {
+  var firstLineBreak = text.indexOf("\n");
+  var firstLine = firstLineBreak >= 0 ? text.substring(0, firstLineBreak) : text;
+  var multiLine = firstLineBreak >= 0;
+  if (!multiLine && text.length <= 100) {
+    return '<pre class="sql-cell-pre">' + escapeHtml(text) + '</pre>';
+  }
+  var summary = firstLine.length > 100 ?
+    firstLine.substring(0, 100) + "..." :
+    firstLine + (multiLine ? "  ..." : "");
+  return '<details class="sql-cell-details">' +
+    '<summary>' + escapeHtml(summary) + '</summary>' +
+    '<pre class="sql-cell-pre">' + escapeHtml(text) + '</pre>' +
+    '</details>';
+}
+
+// Resolve the Spark applicationId for the current page, then invoke
+// callback(appId). Checks /proxy/<id> and /history/<id> path prefixes first;
+// falls back to the REST list endpoint for the local-mode UI.
+function withResolvedAppId(callback) {
+  var words = document.baseURI.split("/");
+  var appId = "";
+  var ind = words.indexOf("proxy");
+  if (ind > 0) {
+    appId = words[ind + 1];
+  } else {
+    ind = words.indexOf("history");
+    if (ind > 0) {
+      appId = words[ind + 1];
+    }
+  }
+  if (appId) {
+    callback(appId);
+    return;
+  }
+  $.getJSON(uiRoot + "/api/v1/applications", function (response) {
+    if (response && response.length > 0) {
+      callback(response[0].id);
+    }
+  });
+}
+
+// Build the base URL for SQL REST endpoints, accounting for proxy/history paths.
+// Returns "<baseURI>/api/v1/applications/<resolvedAppId>/sql"; the caller can
+// append "/sqlTable", "/<id>", etc.
+function createSqlApiBase(appId) {
+  var words = document.baseURI.split("/");
+  var ind = words.indexOf("proxy");
+  var newBaseURI;
+  if (ind > 0) {
+    appId = words[ind + 1];
+    newBaseURI = words.slice(0, ind + 2).join("/");
+    return newBaseURI + "/api/v1/applications/" + appId + "/sql";
+  }
+  ind = words.indexOf("history");
+  if (ind > 0) {
+    appId = words[ind + 1];
+    var attemptId = words[ind + 2];
+    newBaseURI = words.slice(0, ind).join("/");
+    if (isNaN(attemptId)) {
+      return newBaseURI + "/api/v1/applications/" + appId + "/sql";
+    } else {
+      return newBaseURI + "/api/v1/applications/" + appId + "/" +
+        attemptId + "/sql";
+    }
+  }
+  return uiRoot + "/api/v1/applications/" + appId + "/sql";
+}
+
+// Factory for the shared SQL DataTable column definitions used on both the
+// SQL listing page (`allexecutionspage.js`) and the SQL execution detail page
+// (`executionpage.js`).
+//   opts.detail - true on the detail page (single-row table): no truncation,
+//                 ID and Description rendered as plain text (no self-link),
+//                 description rendered as <pre> so SQL formatting is kept.
+//                 false on the list page (default): truncate long values,
+//                 ID and Description link to the detail page.
+function getSqlTableColumns(opts) {
+  opts = opts || {};
+  var detail = opts.detail === true;
+
+  var idColumn = {
+    data: "id", name: "id", title: "ID",
+    render: function (data, type) {
+      if (type !== "display") return data;
+      if (detail) return data;
+      var basePath = uiRoot + appBasePath;
+      return '<a href="' + basePath + '/SQL/execution/?id=' + data + '">' +
+        data + '</a>';
+    }
+  };
+
+  var queryIdColumn = {
+    data: "queryId", name: "queryId", title: "Query ID",
+    orderable: false,
+    render: function (data, type) {
+      if (type !== "display" || !data) return data || "";
+      var safe = escapeHtml(data);
+      if (detail) return safe;
+      return '<span title="' + safe + '">' +
+        escapeHtml(data.substring(0, 8)) + '...</span>';
+    }
+  };
+
+  var statusColumn = {
+    data: "status", name: "status", title: "Status",
+    render: function (data, type) {
+      if (type !== "display") return data;
+      return statusBadge(data);
+    }
+  };
+
+  var descriptionColumn = {
+    data: "description", name: "description", title: "Description",
+    render: function (data, type, row) {
+      if (type !== "display") return data || "";
+      return descriptionHtml({ id: row.id, description: data }, { detail: detail });
+    }
+  };
+
+  var submissionColumn = {
+    data: "submissionTime", name: "submissionTime", title: "Submitted",
+    render: function (data, type) {
+      if (type !== "display") return data;
+      return formatDateSql(data);
+    }
+  };
+
+  var durationColumn = {
+    data: "duration", name: "duration", title: "Duration",
+    render: function (data, type) {
+      if (type !== "display") return data;
+      return formatDurationSql(data);
+    }
+  };
+
+  var jobsColumn = {
+    data: "jobIds", name: "jobIds", title: "Succeeded Jobs",
+    orderable: false,
+    render: function (data, type) {
+      if (type !== "display") return (data || []).join(",");
+      return jobIdLinks(data || []);
+    }
+  };
+
+  var errorColumn = {
+    data: "errorMessage", name: "errorMessage", title: "Error Message",
+    orderable: false,
+    render: function (data, type) {
+      if (type !== "display" || !data) return data || "";
+      if (detail) {
+        return collapsiblePre(data);
+      }
+      if (data.length > 100) {
+        return '<span title="' + escapeHtml(data) + '">' +
+          escapeHtml(data.substring(0, 100)) + '...</span>';
+      }
+      return escapeHtml(data);
+    }
+  };
+
+  return [idColumn, queryIdColumn, statusColumn, descriptionColumn,
+    submissionColumn, durationColumn, jobsColumn, errorColumn];
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMetricView.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMetricView.scala
index a7763c2799a6e..760a3d09f467b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMetricView.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveMetricView.scala
@@ -23,13 +23,11 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Measure}
-import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.METRIC_VIEW_PLACEHOLDER
-import org.apache.spark.sql.metricview.logical.{MetricViewPlaceholder, ResolvedMetricView}
-import org.apache.spark.sql.metricview.serde.{Column => CanonicalColumn, DimensionExpression, JsonUtils, MeasureExpression, MetricView => CanonicalMetricView}
-import org.apache.spark.sql.types.{DataType, Metadata, MetadataBuilder}
+import org.apache.spark.sql.metricview.logical.{DimensionInputColumn, InputColumn, MeasureInputColumn, MetricViewPlaceholder, ResolvedMetricView}
+import org.apache.spark.sql.types.DataType
 
 /**
  * Analysis rule for resolving metric view operations (CREATE and SELECT).
@@ -165,7 +163,6 @@ import org.apache.spark.sql.types.{DataType, Metadata, MetadataBuilder}
  * into resolved logical plans that can be further optimized and executed.
  */
 case class ResolveMetricView(session: SparkSession) extends Rule[LogicalPlan] {
-  private def parser: ParserInterface = session.sessionState.sqlParser
   override def apply(plan: LogicalPlan): LogicalPlan = {
     if (!plan.containsPattern(METRIC_VIEW_PLACEHOLDER)) {
       return plan
@@ -176,7 +173,7 @@ case class ResolveMetricView(session: SparkSession) extends Rule[LogicalPlan] {
       // are aggregate functions, we need to use an Aggregate node and group by all
       // dimensions to get the output schema.
       case mvp: MetricViewPlaceholder if mvp.isCreate && mvp.child.resolved =>
-        val (dimensions, measures) = buildMetricViewOutput(mvp.desc)
+        val (dimensions, measures) = buildMetricViewOutput(mvp.inputColumns)
         Aggregate(
           // group by all dimensions
           dimensions.map(_.toAttribute).toSeq,
@@ -190,9 +187,11 @@ case class ResolveMetricView(session: SparkSession) extends Rule[LogicalPlan] {
       // Resolve the Aggregate node based on the metric view output and then replace
       // the AttributeReference of the metric view output to the actual expressions.
       case node @ MetricViewReadOperation(metricView) =>
-        // step 1: parse the metric view definition
+        // step 1: build typed dimension / measure columns from the placeholder's
+        // pre-parsed `inputColumns`. These were populated by `MetricViewPlanner` from the
+        // YAML descriptor so the resolver doesn't need to re-parse expressions here.
         val (dimensions, measures) =
-          parseMetricViewColumns(metricView.outputMetrics, metricView.desc.select)
+          buildMetricViewColumns(metricView.outputMetrics, metricView.inputColumns)
 
         // step 2: build the Project node containing the dimensions
         val dimensionExprs = dimensions.map(_.namedExpr)
@@ -235,50 +234,40 @@ case class ResolveMetricView(session: SparkSession) extends Rule[LogicalPlan] {
     }
   }
 
-  private def buildMetricViewOutput(metricView: CanonicalMetricView)
+  /**
+   * Builds the named expressions used by the CREATE-time aggregate (so the analyzer can
+   * derive the output schema). Reads pre-parsed expressions from [[InputColumn]]s.
+   */
+  private def buildMetricViewOutput(inputColumns: Seq[InputColumn])
   : (Seq[NamedExpression], Seq[NamedExpression]) = {
     val dimensions = new mutable.ArrayBuffer[NamedExpression]()
     val measures = new mutable.ArrayBuffer[NamedExpression]()
-    metricView.select.foreach { col =>
-      val metadata = new MetadataBuilder()
-        .withMetadata(Metadata.fromJson(JsonUtils.toJson(col.getColumnMetadata)))
-        .build()
-      col.expression match {
-        case DimensionExpression(expr) =>
-          dimensions.append(
-            Alias(parser.parseExpression(expr), col.name)(explicitMetadata = Some(metadata)))
-        case MeasureExpression(expr) =>
-          measures.append(
-            Alias(parser.parseExpression(expr), col.name)(explicitMetadata = Some(metadata)))
-      }
+    inputColumns.foreach {
+      case c: DimensionInputColumn =>
+        dimensions.append(Alias(c.expr, c.name)(explicitMetadata = Some(c.metadata)))
+      case c: MeasureInputColumn =>
+        measures.append(Alias(c.expr, c.name)(explicitMetadata = Some(c.metadata)))
     }
     (dimensions.toSeq, measures.toSeq)
   }
 
-  private def parseMetricViewColumns(
+  /**
+   * Pairs each pre-parsed [[InputColumn]] with the matching output attribute (by position)
+   * so the resolver can replace `MEASURE(<measure-name>)` references with the original
+   * aggregate expression while preserving exprId and data type.
+   */
+  private def buildMetricViewColumns(
       metricViewOutput: Seq[Attribute],
-      columns: Seq[CanonicalColumn]
+      inputColumns: Seq[InputColumn]
   ): (Seq[MetricViewDimension], Seq[MetricViewMeasure]) = {
     val dimensions = new mutable.ArrayBuffer[MetricViewDimension]()
     val measures = new mutable.ArrayBuffer[MetricViewMeasure]()
-    metricViewOutput.zip(columns).foreach { case (attr, column) =>
-      column.expression match {
-        case DimensionExpression(expr) =>
-          dimensions.append(
-            MetricViewDimension(
-              attr.name,
-              parser.parseExpression(expr),
-              attr.exprId,
-              attr.dataType)
-          )
-        case MeasureExpression(expr) =>
-          measures.append(
-            MetricViewMeasure(
-              attr.name,
-              parser.parseExpression(expr),
-              attr.exprId,
-              attr.dataType)
-          )
+    metricViewOutput.zip(inputColumns).foreach { case (attr, column) =>
+      column match {
+        case c: DimensionInputColumn =>
+          dimensions.append(MetricViewDimension(attr.name, c.expr, attr.exprId, attr.dataType))
+        case c: MeasureInputColumn =>
+          measures.append(MetricViewMeasure(attr.name, c.expr, attr.exprId, attr.dataType))
       }
     }
     (dimensions.toSeq, measures.toSeq)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index e2bfaef1e7002..cfd52707bbc2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, CharVarcharUtils, ResolveDefaultColumns => DefaultCols}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table, ViewCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.command._
@@ -36,6 +36,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1,
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.internal.connector.V1Function
+import org.apache.spark.sql.metricview.logical.CreateMetricView
 import org.apache.spark.sql.types.{DataType, MetadataBuilder, StringType, StructField, StructType}
 import org.apache.spark.util.SparkStringUtils
 
@@ -208,11 +209,25 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         output) =>
       DescribeTableCommand(resolvedChild, ident, spec, isExtended, output)
 
-    case DescribeColumn(
-        ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended, output) =>
-      // For views, the column will not be resolved by `ResolveReferences` because
-      // `ResolvedView` stores only the identifier.
-      DescribeColumnCommand(ident, column.nameParts, isExtended, output)
+    // `DESCRIBE TABLE <view> PARTITION (...)` against a non-session v2 view: the v1 rewrite
+    // above is gated on `ResolvedV1TableOrViewIdentifier` (session-only), so non-session v2
+    // views fall through. Reject early with the same `FORBIDDEN_OPERATION` v1 raises at
+    // runtime in `DescribeTableCommand.describeDetailedPartitionInfo`. Without this rewrite,
+    // CheckAnalysis surfaces a generic "Found the unresolved operator" INTERNAL_ERROR
+    // because `UnresolvedPartitionSpec` is never resolved on the v2 view path.
+    case DescribeTablePartition(rpv: ResolvedPersistentView, _, _, _) =>
+      val quoted = (rpv.catalog.name() +: rpv.identifier.asMultipartIdentifier)
+        .map(quoteIfNeeded).mkString(".")
+      throw QueryCompilationErrors.descPartitionNotAllowedOnView(quoted)
+
+    case DescribeColumn(ResolvedViewIdentifier(ident), column, isExtended, output) =>
+      // `ResolvedPersistentView` exposes the view's schema as its `output`, so `ResolveReferences`
+      // typically resolves the column to an `Attribute` here. We also accept the legacy
+      // `UnresolvedAttribute` form (e.g. the parser referenced a non-existent column whose
+      // resolution was skipped) so the rewrite stays robust across analyzer ordering changes.
+      // The unwrap logic is shared with the non-session v2 view path in `DataSourceV2Strategy`.
+      val nameParts = DescribeColumn.extractColumnNameParts(column)
+      DescribeColumnCommand(ident, nameParts, isExtended, output)
 
     case DescribeColumn(ResolvedV1TableIdentifier(ident), column, isExtended, output) =>
       column match {
@@ -327,11 +342,17 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case DropView(DropViewInSessionCatalog(ident), ifExists) =>
       DropTableCommand(ident, ifExists, isView = true, purge = false)
 
-    case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) =>
+    // ViewCatalog catalogs fall through to `DataSourceV2Strategy`, which routes DROP VIEW to
+    // `ViewCatalog.dropView` (this also covers METRIC_VIEW since metric views are persisted
+    // through the same ViewCatalog interface). Other non-session catalogs get
+    // `MISSING_CATALOG_ABILITY.VIEWS`, matching the error raised from `CheckViewReferences` for
+    // CREATE/ALTER VIEW and from the analyzer gate on UnresolvedView.
+    case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists)
+        if !catalog.isInstanceOf[ViewCatalog] =>
       if (catalog == FakeSystemCatalog) {
         DropTempViewCommand(ident, ifExists)
       } else {
-        throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views")
+        throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog)
       }
 
     case c @ CreateNamespace(DatabaseNameInSessionCatalog(name), _, _) if conf.useV1Command =>
@@ -517,14 +538,21 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         location) =>
       AlterTableSetLocationCommand(ident, Some(partitionSpec), location)
 
-    case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) =>
+    // The final `_, _` are AlterViewAs.isAnalyzed and referredTempFunctions. We drop both:
+    // AlterViewAsCommand is a separate AnalysisOnlyCommand and gets its own markAsAnalyzed pass
+    // from HandleSpecialCommand after this rewrite.
+    case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query, _, _) =>
       AlterViewAsCommand(ident, originalText, query)
 
     case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode) =>
       AlterViewSchemaBindingCommand(ident, viewSchemaMode)
 
+    // The final `_, _` are CreateView.isAnalyzed and referredTempFunctions. We drop both:
+    // CreateViewCommand is a separate AnalysisOnlyCommand and gets its own markAsAnalyzed pass
+    // from HandleSpecialCommand after this rewrite.
     case CreateView(CreateViewInSessionCatalog(ident), userSpecifiedColumns, comment,
-        collation, properties, originalText, child, allowExisting, replace, viewSchemaMode) =>
+        collation, properties, originalText, query, allowExisting, replace, viewSchemaMode,
+        _, _) =>
       CreateViewCommand(
         name = ident,
         userSpecifiedColumns = userSpecifiedColumns,
@@ -532,16 +560,31 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         collation = collation,
         properties = properties,
         originalText = originalText,
-        plan = child,
+        plan = query,
         allowExisting = allowExisting,
         replace = replace,
         viewType = PersistedView,
         viewSchemaMode = viewSchemaMode)
 
-    case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _, _, _) =>
-      throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog)
-
-    case ShowViews(ns: ResolvedNamespace, pattern, output) =>
+    // CREATE VIEW ... WITH METRICS on the session catalog -> V1 runnable command. Non-session
+    // v2 catalogs leave [[CreateMetricView]] in place for `DataSourceV2Strategy` to dispatch
+    // to `CreateV2MetricViewExec`.
+    case cm @ CreateMetricView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _)
+        if isSessionCatalog(catalog) =>
+      CreateMetricViewCommand(
+        cm.child,
+        cm.userSpecifiedColumns,
+        cm.comment,
+        cm.properties,
+        cm.originalText,
+        cm.allowExisting,
+        cm.replace)
+
+    // ViewCatalog catalogs are handled by the v2 strategy (enumerates via listViews); we skip
+    // the match here so the plan flows through unchanged. Only non-session, non-ViewCatalog
+    // catalogs hit the MISSING_CATALOG_ABILITY.VIEWS rejection.
+    case ShowViews(ns: ResolvedNamespace, pattern, output)
+        if !ns.catalog.isInstanceOf[ViewCatalog] =>
       ns match {
         case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output)
         case _ =>
@@ -772,9 +815,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   }
 
   object ResolvedViewIdentifier {
+    // Only matches session-catalog persistent views. Non-session-catalog persistent views
+    // (produced for `MetadataTable`) fall through and are picked up by dedicated v2 strategy
+    // cases in `DataSourceV2Strategy` -- AlterViewAs, SET/UNSET TBLPROPERTIES, ALTER VIEW ...
+    // WITH SCHEMA, RENAME TO, SHOW CREATE TABLE, SHOW TBLPROPERTIES, SHOW COLUMNS, DESCRIBE
+    // [COLUMN] all dispatch to v2 view execs that consume `ResolvedPersistentView.info`
+    // directly.
     def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
-      case ResolvedPersistentView(catalog, ident, _) =>
-        assert(isSessionCatalog(catalog))
+      case ResolvedPersistentView(catalog, ident, _) if isSessionCatalog(catalog) =>
         Some(ident.asTableIdentifier.copy(catalog = Some(catalog.name)))
 
       case ResolvedTempView(ident, _) =>
@@ -938,4 +986,5 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) == "builtin" ||
         catalog.isInstanceOf[CatalogExtension])
   }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/Catalog.scala
index 35041feca9e18..40c40f6ea78aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/Catalog.scala
@@ -272,12 +272,12 @@ class Catalog(sparkSession: SparkSession) extends catalog.Catalog with Logging {
             if (isExternal) CatalogTableType.EXTERNAL.name
             else CatalogTableType.MANAGED.name,
           isTemporary = false)
-      case ResolvedPersistentView(catalog, identifier, metadata) =>
+      case ResolvedPersistentView(catalog, identifier, info) =>
         new Table(
           name = identifier.name(),
           catalog = catalog.name(),
           namespace = identifier.namespace(),
-          description = metadata.comment.orNull,
+          description = info.properties().get(TableCatalog.PROP_COMMENT),
           tableType = "VIEW",
           isTemporary = false
         )
@@ -409,10 +409,12 @@ class Catalog(sparkSession: SparkSession) extends catalog.Catalog with Logging {
         val catalogPath =
           (Seq(currentCatalog()) ++
             sparkSession.sessionState.catalogManager.currentNamespace).toSeq
-        val searchPath = sparkSession.sessionState.conf.resolutionSearchPath(catalogPath)
+        val searchPath = sparkSession.sessionState.catalogManager
+          .sqlResolutionPathEntries(catalogPath.head, catalogPath.tail.toSeq)
+          .map(_.quoted)
         throw QueryCompilationErrors.unresolvedRoutineError(
           ident,
-          searchPath.map(_.quoted),
+          searchPath,
           plan.origin)
     }
   }
@@ -459,8 +461,8 @@ class Catalog(sparkSession: SparkSession) extends catalog.Catalog with Logging {
         schemaToColumns(schema, partitionColumnNames.contains, bucketColumnNames.contains,
           clusteringColumnNames.contains)
 
-      case ResolvedPersistentView(_, _, metadata) =>
-        schemaToColumns(metadata.schema)
+      case ResolvedPersistentView(_, _, info) =>
+        schemaToColumns(info.schema)
 
       case ResolvedTempView(_, metadata) =>
         schemaToColumns(metadata.schema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameReader.scala
index d0d6bf1e8ec0d..3dbdf05305164 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameReader.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql
 import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.catalyst.DataSourceOptions
 import org.apache.spark.sql.catalyst.analysis.{RelationChanges, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.analysis.ChangelogInfoUtils
+import org.apache.spark.sql.catalyst.analysis.ChangelogContextUtils
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
@@ -328,10 +328,10 @@ class DataFrameReader private[sql](sparkSession: SparkSession)
     val multipartIdentifier =
       sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName)
     val options = new CaseInsensitiveStringMap(extraOptions.toMap.asJava)
-    val changelogInfo = ChangelogInfoUtils.fromOptions(
+    val changelogContext = ChangelogContextUtils.fromOptions(
       options, sparkSession.sessionState.conf.sessionLocalTimeZone)
     val relation = UnresolvedRelation(multipartIdentifier, options)
-    Dataset.ofRows(sparkSession, RelationChanges(relation, changelogInfo))
+    Dataset.ofRows(sparkSession, RelationChanges(relation, changelogContext))
   }
 
   /** @inheritdoc */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala
index f0359b33f431d..a9f16ffa87be1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriter.scala
@@ -192,17 +192,20 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
           val relation = DataSourceV2Relation.create(table, catalog, ident, dsOptions)
           checkPartitioningMatchesV2Table(table)
           if (curmode == SaveMode.Append) {
-            AppendData.byName(relation, df.logicalPlan, finalOptions)
+            AppendData.byName(relation, df.logicalPlan, finalOptions, _withSchemaEvolution)
           } else {
             // Truncate the table. TableCapabilityCheck will throw a nice exception if this
             // isn't supported
             OverwriteByExpression.byName(
-              relation, df.logicalPlan, Literal(true), finalOptions)
+              relation, df.logicalPlan, Literal(true), finalOptions, _withSchemaEvolution)
           }
 
         case createMode =>
           provider match {
             case supportsExtract: SupportsCatalogOptions =>
+              if (_withSchemaEvolution) {
+                throw QueryCompilationErrors.schemaEvolutionNotSupportedForCreateTableWriteError()
+              }
               val ident = supportsExtract.extractIdentifier(dsOptions)
               val catalog = CatalogV2Util.getTableProviderCatalog(
                 supportsExtract, catalogManager, dsOptions)
@@ -233,16 +236,24 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
                 // Streaming also uses the data source V2 API. So it may be that the data source
                 // implements v2, but has no v2 implementation for batch writes. In that case, we
                 // fallback to saving as though it's a V1 source.
+                assertSchemaEvolutionNotEnabledForV1Write()
                 saveToV1SourceCommand(path)
               }
           }
       }
 
     } else {
+      assertSchemaEvolutionNotEnabledForV1Write()
       saveToV1SourceCommand(path)
     }
   }
 
+  private def assertSchemaEvolutionNotEnabledForV1Write(): Unit = {
+    if (_withSchemaEvolution) {
+      throw QueryCompilationErrors.schemaEvolutionNotSupportedForV1TableWriteError()
+    }
+  }
+
   private def getOptionsWithPath(path: Option[String]): CaseInsensitiveMap[String] = {
     if (path.isEmpty) {
       extraOptions
@@ -347,7 +358,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
 
     curmode match {
       case SaveMode.Append | SaveMode.ErrorIfExists | SaveMode.Ignore =>
-        AppendData.byPosition(table, df.logicalPlan, extraOptions.toMap)
+        AppendData.byPosition(table, df.logicalPlan, extraOptions.toMap, _withSchemaEvolution)
 
       case SaveMode.Overwrite =>
         val conf = df.sparkSession.sessionState.conf
@@ -355,14 +366,17 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
           conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
 
         if (dynamicPartitionOverwrite) {
-          OverwritePartitionsDynamic.byPosition(table, df.logicalPlan, extraOptions.toMap)
+          OverwritePartitionsDynamic.byPosition(
+            table, df.logicalPlan, extraOptions.toMap, _withSchemaEvolution)
         } else {
-          OverwriteByExpression.byPosition(table, df.logicalPlan, Literal(true), extraOptions.toMap)
+          OverwriteByExpression.byPosition(
+            table, df.logicalPlan, Literal(true), extraOptions.toMap, _withSchemaEvolution)
         }
     }
   }
 
   private def insertIntoCommand(tableIdent: TableIdentifier): LogicalPlan = {
+    assertSchemaEvolutionNotEnabledForV1Write()
     InsertIntoStatement(
       table = UnresolvedRelation(tableIdent).requireWritePrivileges(getWritePrivileges),
       partitionSpec = Map.empty[String, Option[String]],
@@ -452,6 +466,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
         saveAsTableCommand(catalog.asTableCatalog, v2ProviderOpt, ident, nameParts)
 
       case AsTableIdentifier(tableIdentifier) =>
+        assertSchemaEvolutionNotEnabledForV1Write()
         saveAsV1TableCommand(tableIdentifier)
 
       case other =>
@@ -470,14 +485,18 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
 
     (curmode, tableOpt) match {
       case (_, Some(_: V1Table)) =>
+        assertSchemaEvolutionNotEnabledForV1Write()
         saveAsV1TableCommand(TableIdentifier(ident.name(), ident.namespace().headOption))
 
       case (SaveMode.Append, Some(table)) =>
         checkPartitioningMatchesV2Table(table)
         val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
-        AppendData.byName(v2Relation, df.logicalPlan, extraOptions.toMap)
+        AppendData.byName(v2Relation, df.logicalPlan, extraOptions.toMap, _withSchemaEvolution)
 
       case (SaveMode.Overwrite, _) =>
+        if (_withSchemaEvolution) {
+          throw QueryCompilationErrors.schemaEvolutionNotSupportedForReplaceTableWriteError()
+        }
         val tableSpec = UnresolvedTableSpec(
           properties = Map.empty,
           provider = Some(source),
@@ -504,6 +523,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) extends sql.DataFram
           orCreate = true) // Create the table if it doesn't exist
 
       case (other, _) =>
+        if (_withSchemaEvolution) {
+          throw QueryCompilationErrors.schemaEvolutionNotSupportedForCreateTableWriteError()
+        }
         // We have a potential race condition here in AppendMode, if the table suddenly gets
         // created between our existence check and physical execution, but this can't be helped
         // in any case.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriterV2.scala
index 169822db96c26..0da60e98cdbe7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriterV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataFrameWriterV2.scala
@@ -152,6 +152,9 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
   }
 
   private[sql] def createCommand(): LogicalPlan = {
+    if (_withSchemaEvolution) {
+      throw QueryCompilationErrors.schemaEvolutionNotSupportedForCreateTableWriteError()
+    }
     CreateTableAsSelect(
       UnresolvedIdentifier(tableName),
       partitioning.getOrElse(Seq.empty) ++ clustering,
@@ -195,7 +198,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
   private[sql] def appendCommand(): LogicalPlan = {
     AppendData.byName(
       UnresolvedRelation(tableName).requireWritePrivileges(Set(INSERT)),
-      logicalPlan, options.toMap)
+      logicalPlan, options.toMap, withSchemaEvolution = _withSchemaEvolution)
   }
 
   /** @inheritdoc */
@@ -207,7 +210,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
   private[sql] def overwriteCommand(condition: Column): LogicalPlan = {
     OverwriteByExpression.byName(
       UnresolvedRelation(tableName).requireWritePrivileges(Set(INSERT, DELETE)),
-      logicalPlan, expression(condition), options.toMap)
+      logicalPlan, expression(condition), options.toMap, _withSchemaEvolution)
   }
 
   /** @inheritdoc */
@@ -219,7 +222,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
   private[sql] def overwritePartitionsCommand(): LogicalPlan = {
     OverwritePartitionsDynamic.byName(
       UnresolvedRelation(tableName).requireWritePrivileges(Set(INSERT, DELETE)),
-      logicalPlan, options.toMap)
+      logicalPlan, options.toMap, _withSchemaEvolution)
   }
 
   /**
@@ -238,6 +241,9 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
   }
 
   private[sql] def replaceCommand(orCreate: Boolean): LogicalPlan = {
+    if (_withSchemaEvolution) {
+      throw QueryCompilationErrors.schemaEvolutionNotSupportedForReplaceTableWriteError()
+    }
     ReplaceTableAsSelect(
       UnresolvedIdentifier(tableName),
       partitioning.getOrElse(Seq.empty) ++ clustering,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamReader.scala
index f2a2e99fdc143..eb3120cac05aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamReader.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.classic
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.{Evolving, Experimental}
-import org.apache.spark.sql.catalyst.analysis.{ChangelogInfoUtils, NamedStreamingRelation, RelationChanges, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.{ChangelogContextUtils, NamedStreamingRelation, RelationChanges, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.plans.logical.UnresolvedDataSource
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.classic.ClassicConversions._
@@ -67,15 +67,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession)
     this
   }
 
-  /**
-   * Specifies a name for the streaming source. This name is used to identify the source
-   * in checkpoint metadata and enables stable checkpoint locations for source evolution.
-   *
-   * @param sourceName the name to assign to this streaming source
-   * @since 4.2.0
-   */
+  /** @inheritdoc */
   @Experimental
-  private[sql] def name(sourceName: String): this.type = {
+  override def name(sourceName: String): this.type = {
     validateSourceName(sourceName)
     this.userProvidedSourceName = Option(sourceName)
     this
@@ -123,10 +117,10 @@ final class DataStreamReader private[sql](sparkSession: SparkSession)
     assertNoSpecifiedSchema("changes")
     val identifier = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName)
     val options = new CaseInsensitiveStringMap(extraOptions.toMap.asJava)
-    val changelogInfo = ChangelogInfoUtils.fromOptions(
+    val changelogContext = ChangelogContextUtils.fromOptions(
       options, sparkSession.sessionState.conf.sessionLocalTimeZone)
     val unresolved = UnresolvedRelation(identifier, options, isStreaming = true)
-    val changes = RelationChanges(unresolved, changelogInfo)
+    val changes = RelationChanges(unresolved, changelogContext)
     val plan = NamedStreamingRelation.withUserProvidedName(changes, userProvidedSourceName)
     Dataset.ofRows(sparkSession, plan)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamWriter.scala
index 38483395ec8c5..75c3fc3e356e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/DataStreamWriter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.annotation.Evolving
 import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.sql.{streaming, Dataset => DS, ForeachWriter}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnDefinition, CreateTable, OptionList, UnresolvedTableSpec}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
@@ -83,6 +83,13 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) extends streaming.D
     this
   }
 
+  /** @inheritdoc */
+  private[sql] def name(sinkName: String): this.type = {
+    validateSinkName(sinkName)
+    this.sinkName = Some(sinkName)
+    this
+  }
+
   /** @inheritdoc */
   def format(source: String): this.type = {
     this.source = source
@@ -190,7 +197,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) extends streaming.D
     val tableInstance = catalog.asTableCatalog.loadTable(identifier)
 
     def writeToV1Table(table: CatalogTable): StreamingQuery = {
-      if (table.tableType == CatalogTableType.VIEW) {
+      if (table.isViewLike) {
         throw QueryCompilationErrors.streamingIntoViewNotSupportedError(tableName)
       }
       require(table.provider.isDefined)
@@ -312,6 +319,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) extends streaming.D
 
     ds.sparkSession.sessionState.streamingQueryManager.startQuery(
       newOptions.get("queryName"),
+      sinkName,
       newOptions.get("checkpointLocation"),
       ds,
       newOptions.originalMap,
@@ -444,6 +452,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) extends streaming.D
   private var partitioningColumns: Option[Seq[String]] = None
 
   private var clusteringColumns: Option[Seq[String]] = None
+
+  private var sinkName: Option[String] = None
 }
 
 object DataStreamWriter {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala
index 5bef4e35ba57e..d83a4df51cd52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala
@@ -764,6 +764,66 @@ class Dataset[T] private[sql](
     lateralJoin(right, Some(joinExprs), LateralJoinType(joinType))
   }
 
+  private[sql] def nearestByJoin(
+      right: sql.Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      joinType: JoinType,
+      approx: Boolean,
+      direction: NearestByDirection): DataFrame = {
+    if (numResults < 1 || numResults > NearestByJoin.MaxNumResults) {
+      throw new AnalysisException(
+        errorClass = "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+        messageParameters = Map(
+          "numResults" -> numResults.toString,
+          "min" -> "1",
+          "max" -> NearestByJoin.MaxNumResults.toString))
+    }
+    withPlan {
+      NearestByJoin(
+        logicalPlan,
+        right.logicalPlan,
+        joinType,
+        approx,
+        numResults,
+        rankingExpression.expr,
+        direction)
+    }
+  }
+
+  /** @inheritdoc */
+  def nearestByJoin(
+      right: sql.Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      mode: String,
+      direction: String): DataFrame = {
+    nearestByJoin(
+      right,
+      rankingExpression,
+      numResults,
+      Inner,
+      NearestByJoinMode(mode),
+      NearestByDirection(direction))
+  }
+
+  /** @inheritdoc */
+  def nearestByJoin(
+      right: sql.Dataset[_],
+      rankingExpression: Column,
+      numResults: Int,
+      mode: String,
+      direction: String,
+      joinType: String): DataFrame = {
+    nearestByJoin(
+      right,
+      rankingExpression,
+      numResults,
+      NearestByJoinType(joinType),
+      NearestByJoinMode(mode),
+      NearestByDirection(direction))
+  }
+
   // TODO(SPARK-22947): Fix the DataFrame API.
   private[sql] def joinAsOf(
       other: Dataset[_],
@@ -2259,9 +2319,11 @@ class Dataset[T] private[sql](
    */
   private def withAction[U](name: String, qe: QueryExecution)(action: SparkPlan => U) = {
     SQLExecution.withNewExecutionId(qe, Some(name)) {
-      QueryExecution.withInternalError(s"""The "$name" action failed.""") {
-        qe.executedPlan.resetMetrics()
-        action(qe.executedPlan)
+      qe.withQueryExecutionId(sparkSession) {
+        QueryExecution.withInternalError(s"""The "$name" action failed.""") {
+          qe.executedPlan.resetMetrics()
+          action(qe.executedPlan)
+        }
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/StreamingQueryManager.scala
index 72ae3b21d662a..fff8d32a0709b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/StreamingQueryManager.scala
@@ -176,6 +176,7 @@ class StreamingQueryManager private[sql] (
   // scalastyle:off argcount
   private def createQuery(
       userSpecifiedName: Option[String],
+      userSpecifiedSinkName: Option[String],
       userSpecifiedCheckpointLocation: Option[String],
       df: Dataset[_],
       extraOptions: Map[String, String],
@@ -207,6 +208,7 @@ class StreamingQueryManager private[sql] (
 
     val dataStreamWritePlan = WriteToStreamStatement(
       userSpecifiedName,
+      userSpecifiedSinkName,
       userSpecifiedCheckpointLocation,
       useTempCheckpointLocation,
       recoverFromCheckpointLocation,
@@ -277,6 +279,7 @@ class StreamingQueryManager private[sql] (
   @throws[TimeoutException]
   private[sql] def startQuery(
       userSpecifiedName: Option[String],
+      userSpecifiedSinkName: Option[String] = None,
       userSpecifiedCheckpointLocation: Option[String],
       df: Dataset[_],
       extraOptions: Map[String, String],
@@ -290,6 +293,7 @@ class StreamingQueryManager private[sql] (
       catalogTable: Option[CatalogTable] = None): StreamingQuery = {
     val query = createQuery(
       userSpecifiedName,
+      userSpecifiedSinkName,
       userSpecifiedCheckpointLocation,
       df,
       extraOptions,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 3f92f24156d3c..345c1d5d635f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -35,11 +35,12 @@ import org.apache.spark.sql.classic.{Dataset, SparkSession}
 import org.apache.spark.sql.connector.catalog.CatalogPlugin
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, MultipartIdentifierHelper}
 import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.connector.catalog.transactions.Transaction
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation, LogicalRelationWithTable}
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, ExtractV2CatalogAndIdentifier, ExtractV2Table, FileTable, V2TableRefreshUtil}
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation, ExtractV2CatalogAndIdentifier, ExtractV2Table, FileTable, V2TableRefreshUtil}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
@@ -479,8 +480,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     lookupCachedDataInternal(normalized)
   }
 
-  private def lookupCachedDataInternal(plan: LogicalPlan): Option[CachedData] = {
-    val result = cachedData.find(cd => plan.sameResult(cd.plan))
+  private def lookupCachedDataInternal(
+      plan: LogicalPlan,
+      canUse: CachedData => Boolean = _ => true): Option[CachedData] = {
+    val result = cachedData.find(cd => plan.sameResult(cd.plan) && canUse(cd))
     if (result.isDefined) {
       CacheManager.logCacheOperation(log"Dataframe cache hit for input plan:" +
         log"\n${MDC(QUERY_PLAN, plan)} matched with cache entry:" +
@@ -489,16 +492,40 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     result
   }
 
+  // Decides whether the cached entry can be substituted into a plan being executed inside
+  // the given transaction. Collects only the scans whose table belongs to the transaction's
+  // catalog and asks the connector whether reusing the cached snapshot is compatible with its
+  // isolation contract. Note, this function has a side effect of mutating the read set of the
+  // connector.
+  private def validateCachedEntryForTransaction(cd: CachedData, txn: Transaction): Boolean = {
+    val txnCatalogName = txn.catalog().name()
+    val txnTables = cd.cachedRepresentation.cacheBuilder.logicalPlan.collectWithSubqueries {
+      case r: DataSourceV2Relation if r.catalog.exists(_.name() == txnCatalogName) => r.table
+    }.toSet
+    if (txnTables.isEmpty) return true
+
+    val scans = collectWithSubqueries(cd.cachedRepresentation.cacheBuilder.cachedPlan) {
+      case b: BatchScanExec if txnTables.contains(b.table) => b.scan
+    }
+    scans.nonEmpty && txn.registerScans(scans.toArray)
+  }
+
   /**
    * Replaces segments of the given logical plan with cached versions where possible. The input
    * plan must be normalized.
+   *
+   * @param plan   the plan to rewrite.
+   * @param canUse predicate filtering which cached entries are eligible for substitution.
+   *               Defaults to accepting any entry.
    */
-  private[sql] def useCachedData(plan: LogicalPlan): LogicalPlan = {
+  private[sql] def useCachedData(
+      plan: LogicalPlan,
+      canUse: CachedData => Boolean = _ => true): LogicalPlan = {
     val newPlan = plan transformDown {
       case command: Command => command
 
       case currentFragment =>
-        lookupCachedDataInternal(currentFragment).map { cached =>
+        lookupCachedDataInternal(currentFragment, canUse).map { cached =>
           // After cache lookup, we should still keep the hints from the input plan.
           val hints = EliminateResolvedHint.extractHintsFromPlan(currentFragment)._2
           val cachedPlan = cached.cachedRepresentation.withOutput(currentFragment.output)
@@ -511,7 +538,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     }
 
     val result = newPlan.transformAllExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
-      case s: SubqueryExpression => s.withNewPlan(useCachedData(s.plan))
+      case s: SubqueryExpression => s.withNewPlan(useCachedData(s.plan, canUse))
     }
 
     if (result.fastEquals(plan)) {
@@ -527,6 +554,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     result
   }
 
+  /** Transaction-aware variant of [[useCachedData]]. */
+  private[sql] def useCachedData(plan: LogicalPlan, txn: Transaction): LogicalPlan =
+    useCachedData(plan, validateCachedEntryForTransaction(_, txn))
+
   /**
    * Tries to re-cache all the cache entries that contain `resourcePath` in one or more
    * `HadoopFsRelation` node(s) as part of its logical plan.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 00b1f0248cd3e..e4e5936f276c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -268,8 +268,7 @@ private object RowToColumnConverter {
       case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType => LongConverter
       case DoubleType => DoubleConverter
       case StringType => StringConverter
-      case _: GeographyType => GeographyConverter
-      case _: GeometryType => GeometryConverter
+      case _: GeographyType | _: GeometryType => BinaryViewConverter
       case CalendarIntervalType => CalendarConverter
       case VariantType => VariantConverter
       case at: ArrayType => ArrayConverter(getConverterForType(at.elementType, at.containsNull))
@@ -341,16 +340,9 @@ private object RowToColumnConverter {
     }
   }
 
-  private object GeographyConverter extends TypeConverter {
+  private object BinaryViewConverter extends TypeConverter {
     override def append(row: SpecializedGetters, column: Int, cv: WritableColumnVector): Unit = {
-      val data = row.getGeography(column).getBytes
-      cv.appendByteArray(data, 0, data.length)
-    }
-  }
-
-  private object GeometryConverter extends TypeConverter {
-    override def append(row: SpecializedGetters, column: Int, cv: WritableColumnVector): Unit = {
-      val data = row.getGeometry(column).getBytes
+      val data = row.getBinaryView(column).getBytes
       cv.appendByteArray(data, 0, data.length)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 2488b6aa51159..be7013188f2f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution
 
+import java.util.Locale
 import java.util.concurrent.TimeUnit._
 
 import org.apache.hadoop.fs.Path
@@ -159,8 +160,11 @@ case class RowDataSourceScanExec(
 
   private def seqToString(seq: Seq[Any]): String = seq.mkString("[", ", ", "]")
 
-  private def pushedSampleMetadataString(s: TableSampleInfo): String =
-    s"SAMPLE (${(s.upperBound - s.lowerBound) * 100}) ${s.withReplacement} SEED(${s.seed})"
+  private def pushedSampleMetadataString(s: TableSampleInfo): String = {
+    val pct = (s.upperBound - s.lowerBound) * 100
+    val method = s.sampleMethod.toString.toUpperCase(Locale.ROOT)
+    s"$method SAMPLE ($pct) ${s.withReplacement} SEED(${s.seed})"
+  }
 
   override val metadata: Map[String, String] = {
     val markedFilters = if (filters.nonEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 927227325fbd7..6714510874351 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -163,11 +163,11 @@ object HiveResult extends SQLConfHelper {
     case (v: VariantVal, VariantType) => v.toString
     case (g: Geometry, dt: GeometryType) =>
       val internalGeom = STUtils.serializeGeomFromWKB(g, dt)
-      val s = STUtils.stAsEwkt(internalGeom).toString
+      val s = STUtils.stGeomAsEwkt(internalGeom).toString
       if (nested) "\"" + s + "\"" else s
     case (g: Geography, dt: GeographyType) =>
       val internalGeog = STUtils.serializeGeogFromWKB(g, dt)
-      val s = STUtils.stAsEwkt(internalGeog).toString
+      val s = STUtils.stGeogAsEwkt(internalGeog).toString
       if (nested) "\"" + s + "\"" else s
     case (other, u: UserDefinedType[_]) => u.stringifyValue(other)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index f08b561d6ef9a..d843af844d94a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -26,24 +26,26 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.internal.LogKeys.EXTENDED_EXPLAIN_GENERATOR
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, ExtendedExplainGenerator, Row}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.{LazyExpression, NameParameterizedQuery, UnsupportedOperationChecker}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, LazyExpression, NameParameterizedQuery, UnsupportedOperationChecker}
 import org.apache.spark.sql.catalyst.expressions.codegen.ByteCodeStats
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Command, CommandResult, CompoundBody, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, ReturnAnswer, Union, WithCTE}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Command, CommandResult, CompoundBody, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, ReturnAnswer, Union, UnresolvedWith, WithCTE}
 import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule}
+import org.apache.spark.sql.catalyst.transactions.TransactionUtils
 import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.classic.SparkSession
+import org.apache.spark.sql.connector.catalog.LookupCatalog
+import org.apache.spark.sql.connector.catalog.transactions.Transaction
 import org.apache.spark.sql.execution.SQLExecution.EXECUTION_ROOT_ID_KEY
 import org.apache.spark.sql.execution.adaptive.{AdaptiveExecutionContext, InsertAdaptiveSparkPlan}
 import org.apache.spark.sql.execution.bucketing.{CoalesceBucketsInJoin, DisableUnnecessaryBucketedScan}
-import org.apache.spark.sql.execution.datasources.v2.V2TableRefreshUtil
+import org.apache.spark.sql.execution.datasources.v2.{TransactionalExec, V2TableRefreshUtil}
 import org.apache.spark.sql.execution.dynamicpruning.PlanDynamicPruningFilters
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery
@@ -69,7 +71,12 @@ class QueryExecution(
     val mode: CommandExecutionMode.Value = CommandExecutionMode.ALL,
     val shuffleCleanupModeOpt: Option[ShuffleCleanupMode] = None,
     val refreshPhaseEnabled: Boolean = true,
-    val queryId: UUID = UUIDv7Generator.generate()) extends Logging {
+    val queryId: UUID = UUIDv7Generator.generate(),
+    // When a transaction is active, callers creating nested QueryExecution instances MUST pass
+    // the enclosing QueryExecution's analyzer here to propagate the transaction context.
+    // Omitting it causes the nested QE to use sessionState.analyzer, which has no knowledge
+    // of the transaction and will load tables outside the transaction's catalog scope.
+    val analyzerOpt: Option[Analyzer] = None) extends LookupCatalog {
 
   val id: Long = QueryExecution.nextExecutionId
 
@@ -79,6 +86,8 @@ class QueryExecution(
   // TODO: Move the planner an optimizer into here from SessionState.
   protected def planner = sparkSession.sessionState.planner
 
+  protected val catalogManager = sparkSession.sessionState.catalogManager
+
   /**
    * Check whether the query represented by this QueryExecution is a SQL script.
    * @return True if the query is a SQL script, False otherwise.
@@ -90,6 +99,53 @@ class QueryExecution(
     logical.exists(_.expressions.exists(_.exists(_.isInstanceOf[LazyExpression])))
   }
 
+
+  // 1. At the pre-Analyzed plan we look for nodes that implement the TransactionalWrite trait.
+  //    When a plan contains such a node we initiate a transaction. Note, we should never start
+  //    a transaction for operations that are not executed, e.g. EXPLAIN.
+  // 2. Create an analyzer clone with a transaction aware Catalog Manager. The latter is the
+  //    narrow waist of all catalog accesses, and it is also the transaction context carrier.
+  //    This is then passed to all rules during analysis that need to check the catalog. Rules
+  //    that are specifically interested in transactionality can access the transaction directly
+  //    from the Catalog Manager. The transaction catalog, is potentially the place where connectors
+  //    should keep state about the reads (tables+predicates) that occurred during the transaction.
+  // 3. The analyzer instance is passed to nested Query Execution instances. These need to respect
+  //    the open transaction instead of creating their own.
+  private val lazyTransactionOpt = LazyTry {
+    // Always inherit an active transaction from the outer analyzer, regardless of mode.
+    analyzerOpt.flatMap(_.catalogManager.transaction).orElse {
+      // Only begin a new transaction for outer QEs that lead to execution.
+      if (mode != CommandExecutionMode.SKIP) {
+        val catalog = logical match {
+          case UnresolvedWith(TransactionalWrite(c), _, _) => Some(c)
+          case TransactionalWrite(c) => Some(c)
+          case _ => None
+        }
+        catalog.map(TransactionUtils.beginTransaction)
+      } else {
+        None
+      }
+    }
+  }
+  private def transactionOpt: Option[Transaction] = lazyTransactionOpt.get
+
+  // Per-query analyzer: uses a transaction-aware CatalogManager when a transaction is active,
+  // so that all catalog lookups and rule applications during analysis see the correct state
+  // without relying on thread-local context. Any nested QueryExecution that is created during
+  // analysis or execution of a transactional plan must receive this analyzer via analyzerOpt.
+  private val lazyAnalyzer = LazyTry {
+    analyzerOpt.getOrElse {
+      transactionOpt match {
+        case Some(txn) =>
+          sparkSession.sessionState.analyzer.withCatalogManager(
+            catalogManager.withTransaction(txn))
+        case None =>
+          sparkSession.sessionState.analyzer
+      }
+    }
+  }
+  private def analyzer: Analyzer = lazyAnalyzer.get
+
   def assertAnalyzed(): Unit = {
     try {
       analyzed
@@ -102,7 +158,7 @@ class QueryExecution(
     }
   }
 
-  def assertSupported(): Unit = {
+  def assertSupported(): Unit = withAbortTransactionOnFailure {
     if (sparkSession.sessionState.conf.isUnsupportedOperationCheckEnabled) {
       UnsupportedOperationChecker.checkForBatch(analyzed)
     }
@@ -141,7 +197,7 @@ class QueryExecution(
     try {
       val plan = executePhase(QueryPlanningTracker.ANALYSIS) {
         // We can't clone `logical` here, which will reset the `_analyzed` flag.
-        sparkSession.sessionState.analyzer.executeAndCheck(sqlScriptExecuted, tracker)
+        analyzer.executeAndCheck(sqlScriptExecuted, tracker)
       }
       tracker.setAnalyzed(plan)
       plan
@@ -152,7 +208,9 @@ class QueryExecution(
     }
   }
 
-  def analyzed: LogicalPlan = lazyAnalyzed.get
+  def analyzed: LogicalPlan = withAbortTransactionOnFailure {
+    lazyAnalyzed.get
+  }
 
   private val lazyCommandExecuted = LazyTry {
     mode match {
@@ -162,7 +220,9 @@ class QueryExecution(
     }
   }
 
-  def commandExecuted: LogicalPlan = lazyCommandExecuted.get
+  def commandExecuted: LogicalPlan = withAbortTransactionOnFailure {
+    lazyCommandExecuted.get
+  }
 
   private def commandExecutionName(command: Command): String = command match {
     case _: CreateTableAsSelect => "create"
@@ -184,7 +244,7 @@ class QueryExecution(
       // for eagerly executed commands we mark this place as beginning of execution.
       tracker.setReadyForExecution()
       val (qe, result) = QueryExecution.runCommand(
-        sparkSession, p, name, refreshPhaseEnabled, mode, Some(shuffleCleanupMode))
+        sparkSession, p, name, refreshPhaseEnabled, mode, Some(shuffleCleanupMode), Some(analyzer))
       CommandResult(
         qe.analyzed.output,
         qe.commandExecuted,
@@ -222,19 +282,29 @@ class QueryExecution(
   }
 
   // The plan that has been normalized by custom rules, so that it's more likely to hit cache.
-  def normalized: LogicalPlan = lazyNormalized.get
+  def normalized: LogicalPlan = withAbortTransactionOnFailure {
+    lazyNormalized.get
+  }
 
   private val lazyWithCachedData = LazyTry {
     sparkSession.withActive {
       assertAnalyzed()
       assertSupported()
-      // clone the plan to avoid sharing the plan instance between different stages like analyzing,
-      // optimizing and planning.
-      sparkSession.sharedState.cacheManager.useCachedData(normalized.clone())
+
+      // Clone the plan to avoid sharing the plan instance between different stages like
+      // analyzing, optimizing and planning.
+      val planToRewrite = normalized.clone()
+      val cacheManager = sparkSession.sharedState.cacheManager
+      transactionOpt match {
+        case Some(txn) => cacheManager.useCachedData(planToRewrite, txn)
+        case None => cacheManager.useCachedData(planToRewrite)
+      }
     }
   }
 
-  def withCachedData: LogicalPlan = lazyWithCachedData.get
+  def withCachedData: LogicalPlan = withAbortTransactionOnFailure {
+    lazyWithCachedData.get
+  }
 
   def assertCommandExecuted(): Unit = commandExecuted
 
@@ -256,7 +326,9 @@ class QueryExecution(
     }
   }
 
-  def optimizedPlan: LogicalPlan = lazyOptimizedPlan.get
+  def optimizedPlan: LogicalPlan = withAbortTransactionOnFailure {
+    lazyOptimizedPlan.get
+  }
 
   def assertOptimized(): Unit = optimizedPlan
 
@@ -264,14 +336,17 @@ class QueryExecution(
     // We need to materialize the optimizedPlan here because sparkPlan is also tracked under
     // the planning phase
     assertOptimized()
-    executePhase(QueryPlanningTracker.PLANNING) {
+    val plan = executePhase(QueryPlanningTracker.PLANNING) {
       // Clone the logical plan here, in case the planner rules change the states of the logical
       // plan.
       QueryExecution.createSparkPlan(planner, optimizedPlan.clone())
     }
+    attachTransaction(plan)
   }
 
-  def sparkPlan: SparkPlan = lazySparkPlan.get
+  def sparkPlan: SparkPlan = withAbortTransactionOnFailure {
+    lazySparkPlan.get
+  }
 
   def assertSparkPlanPrepared(): Unit = sparkPlan
 
@@ -292,7 +367,9 @@ class QueryExecution(
 
   // executedPlan should not be used to initialize any SparkPlan. It should be
   // only used for execution.
-  def executedPlan: SparkPlan = lazyExecutedPlan.get
+  def executedPlan: SparkPlan = withAbortTransactionOnFailure {
+    lazyExecutedPlan.get
+  }
 
   def assertExecutedPlanPrepared(): Unit = executedPlan
 
@@ -310,7 +387,9 @@ class QueryExecution(
    * Given QueryExecution is not a public class, end users are discouraged to use this: please
    * use `Dataset.rdd` instead where conversion will be applied.
    */
-  def toRdd: RDD[InternalRow] = lazyToRdd.get
+  def toRdd: RDD[InternalRow] = withAbortTransactionOnFailure {
+    lazyToRdd.get
+  }
 
   private val observedMetricsLock = new Object
 
@@ -327,7 +406,30 @@ class QueryExecution(
 
   protected def executePhase[T](phase: String)(block: => T): T = sparkSession.withActive {
     QueryExecution.withInternalError(s"The Spark SQL phase $phase failed with an internal error.") {
-      tracker.measurePhase(phase)(block)
+      withQueryExecutionId(sparkSession) {
+        tracker.measurePhase(phase)(block)
+      }
+    }
+  }
+
+  /**
+   * Set the query execution id in thread-local properties while
+   * executing the block. This is used by
+   * [[org.apache.spark.sql.execution.metric.SQLLastAttemptAccumulator]] to associate
+   * driver-side metric updates with a specific QueryExecution.
+   */
+  private[sql] def withQueryExecutionId[T](
+      session: SparkSession)(block: => T): T = {
+    val sc = session.sparkContext
+    val oldId = sc.getLocalProperty(
+      SparkContext.DATASET_QUERY_EXECUTION_ID_KEY)
+    sc.setLocalProperty(
+      SparkContext.DATASET_QUERY_EXECUTION_ID_KEY, id.toString)
+    try {
+      block
+    } finally {
+      sc.setLocalProperty(
+        SparkContext.DATASET_QUERY_EXECUTION_ID_KEY, oldId)
     }
   }
 
@@ -367,17 +469,24 @@ class QueryExecution(
     }
   }
 
+  /**
+   * Returns the QueryExecution to use when generating an explain string.
+   * Overridden by IncrementalExecution to reuse `this` so that the already-open transaction and
+   * cached executedPlan are not duplicated.
+   */
+  protected def queryExecutionForExplain: QueryExecution = if (logical.isStreaming) {
+    // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the
+    // output mode does not matter since there is no `Sink`.
+    new IncrementalExecution(
+      sparkSession, logical, OutputMode.Append(), "<unknown>",
+      UUID.randomUUID, UUID.randomUUID, 0, None, OffsetSeqMetadata(0, 0),
+      WatermarkPropagator.noop(), false, mode = this.mode)
+  } else {
+    this
+  }
+
   private def explainString(mode: ExplainMode, maxFields: Int, append: String => Unit): Unit = {
-    val queryExecution = if (logical.isStreaming) {
-      // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the
-      // output mode does not matter since there is no `Sink`.
-      new IncrementalExecution(
-        sparkSession, logical, OutputMode.Append(), "<unknown>",
-        UUID.randomUUID, UUID.randomUUID, 0, None, OffsetSeqMetadata(0, 0),
-        WatermarkPropagator.noop(), false, mode = this.mode)
-    } else {
-      this
-    }
+    val queryExecution = queryExecutionForExplain
 
     mode match {
       case SimpleMode =>
@@ -512,6 +621,26 @@ class QueryExecution(
     }
   }
 
+  /**
+   * Runs the given block, aborting the active transaction if an exception is thrown.
+   * If no transaction is active, the block is executed as-is.
+   */
+  private def withAbortTransactionOnFailure[T](block: => T): T = transactionOpt match {
+    case Some(transaction) =>
+      try block
+      catch { case e: Throwable => TransactionUtils.abort(transaction); throw e }
+    case None => block
+  }
+
+
+  /** Attaches a transaction to the given SparkPlan to the transactional execution nodes. */
+  private def attachTransaction(plan: SparkPlan): SparkPlan = transactionOpt match {
+    case Some(txn) => plan.transformDown {
+      case w: TransactionalExec => w.withTransaction(Some(txn))
+    }
+    case None => plan
+  }
+
   /** A special namespace for commands that can be used to debug query execution. */
   // scalastyle:off
   object debug {
@@ -796,14 +925,16 @@ object QueryExecution {
       name: String,
       refreshPhaseEnabled: Boolean = true,
       mode: CommandExecutionMode.Value = CommandExecutionMode.SKIP,
-      shuffleCleanupModeOpt: Option[ShuffleCleanupMode] = None)
+      shuffleCleanupModeOpt: Option[ShuffleCleanupMode] = None,
+      analyzerOpt: Option[Analyzer] = None)
     : (QueryExecution, Array[InternalRow]) = {
     val qe = new QueryExecution(
       sparkSession,
       command,
       mode = mode,
       shuffleCleanupModeOpt = shuffleCleanupModeOpt,
-      refreshPhaseEnabled = refreshPhaseEnabled)
+      refreshPhaseEnabled = refreshPhaseEnabled,
+      analyzerOpt = analyzerOpt)
     val result = QueryExecution.withInternalError(s"Executed $name failed.") {
       SQLExecution.withNewExecutionId(qe, Some(name)) {
         qe.executedPlan.executeCollect()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index 46b2cde2eed95..351b3c009419a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -154,9 +154,12 @@ case class SortExec(
       forceInline = true)
 
     val addToSorter = ctx.freshName("addToSorter")
+    // Pass `partitionIndex` as a parameter so bare references in the child's
+    // produce resolve to the local, not the protected superclass field.
+    // Required when `addNewFunction` spills this helper to a nested class.
     val addToSorterFuncName = ctx.addNewFunction(addToSorter,
       s"""
-        | private void $addToSorter() throws java.io.IOException {
+        | private void $addToSorter(int partitionIndex) throws java.io.IOException {
         |   ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
         | }
       """.stripMargin.trim)
@@ -169,7 +172,7 @@ case class SortExec(
     s"""
        | if ($needToSort) {
        |   long $spillSizeBefore = $metrics.memoryBytesSpilled();
-       |   $addToSorterFuncName();
+       |   $addToSorterFuncName(partitionIndex);
        |   $sortedIterator = $sorterVariable.sort();
        |   $sortTime.add($sorterVariable.getSortTimeNanos() / $NANOS_PER_MILLIS);
        |   $peakMemory.add($sorterVariable.getPeakMemoryUsage());
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 7f94cc77f3454..4f8f66eb59690 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -249,12 +249,21 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     doExecuteWrite(writeFilesSpec)
   }
 
+  /**
+   * A deterministic scope ID for RDDs created by this SparkPlan,
+   * used by LastAttemptAccumulator to track which RDD belongs
+   * to which SparkPlan node.
+   */
+  private[spark] def rddScopeId: String =
+    "spark_plan_" + id.toString
+
   /**
    * Executes a query after preparing the query and adding query plan information to created RDDs
    * for visualization.
    */
   protected final def executeQuery[T](query: => T): T = {
-    RDDOperationScope.withScope(sparkContext, nodeName, false, true) {
+    RDDOperationScope.withScope(
+        sparkContext, nodeName, false, true, rddScopeId) {
       prepare()
       waitForSubqueries()
       query
@@ -268,6 +277,8 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   @transient
   private val runningSubqueries = new ArrayBuffer[ExecSubqueryExpression]
 
+  @transient private val prepareLock = new Object()
+
   /**
    * Finds scalar subquery expressions in this plan node and starts evaluating them.
    */
@@ -284,7 +295,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   /**
    * Blocks the thread until all subqueries finish evaluation and update the results.
    */
-  protected def waitForSubqueries(): Unit = synchronized {
+  protected def waitForSubqueries(): Unit = prepareLock.synchronized {
     // fill in the result of subqueries
     runningSubqueries.foreach { sub =>
       sub.updateResult()
@@ -303,7 +314,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   final def prepare(): Unit = {
     // doPrepare() may depend on it's children, we should call prepare() on all the children first.
     children.foreach(_.prepare())
-    synchronized {
+    prepareLock.synchronized {
       if (!prepared) {
         prepareSubqueries()
         doPrepare()
@@ -320,7 +331,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    * @note `prepare` method has already walked down the tree, so the implementation doesn't have
    * to call children's `prepare` methods.
    *
-   * This will only be called once, protected by `this`.
+   * This will only be called once, protected by [[prepareLock]].
    */
   protected def doPrepare(): Unit = {}
 
@@ -375,6 +386,11 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    */
   private def getByteArrayRdd(
       n: Int = -1, takeFromEnd: Boolean = false): RDD[(Long, ChunkedByteBuffer)] = {
+    // Wrap in the plan's RDD scope so that the wrapper RDD created by mapPartitionsInternal
+    // inherits this plan's deterministic scope ID rather than getting an anonymous auto-generated
+    // one.
+    val rdd = RDDOperationScope.withScope(
+        sparkContext, nodeName, false, true, rddScopeId) {
     execute().mapPartitionsInternal { iter =>
       var count = 0
       val buffer = new Array[Byte](4 << 10)  // 4K
@@ -409,8 +425,10 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
       out.writeInt(-1)
       out.flush()
       out.close()
-      Iterator((count, cbbos.toChunkedByteBuffer))
+      Iterator((count.toLong, cbbos.toChunkedByteBuffer))
+    }
     }
+    rdd
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index b910a3dd6d8aa..0ecbf7609d2b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -30,7 +30,8 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CurrentNamespace,
   GlobalTempView, LocalTempView, PersistedView,
   PlanWithUnresolvedIdentifier, SchemaEvolution, SchemaTypeEvolution, UnresolvedAttribute,
-  UnresolvedIdentifier, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedProcedure}
+  UnresolvedIdentifier, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedProcedure,
+  UnresolvedTableOrViewSearchPathMode}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 import org.apache.spark.sql.catalyst.parser._
@@ -45,6 +46,7 @@ import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.metricview.logical.CreateMetricView
 import org.apache.spark.sql.types.{DataType, StringType}
 import org.apache.spark.util.Utils.getUriBuilder
 
@@ -358,15 +360,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    * }}}
    */
   override def visitSetPath(ctx: SetPathContext): LogicalPlan = withOrigin(ctx) {
-    val elements = ctx.pathElement().asScala.map { pe =>
-      if (pe.DEFAULT_PATH() != null) PathElement.DefaultPath
-      else if (pe.SYSTEM_PATH() != null) PathElement.SystemPath
-      else if (pe.PATH() != null) PathElement.PathRef
-      else if (pe.CURRENT_DATABASE() != null) PathElement.CurrentDatabase
-      else if (pe.CURRENT_SCHEMA() != null) PathElement.CurrentSchema
-      else PathElement.SchemaInPath(visitMultipartIdentifier(pe.multipartIdentifier()))
-    }.toSeq
-    SetPathCommand(elements)
+    SetPathCommand(ctx.pathElement().asScala.map(visitPathElement).toSeq)
   }
 
   /**
@@ -870,7 +864,7 @@ class SparkSqlAstBuilder extends AstBuilder {
       .getOrElse(Map.empty)
     val codeLiteral = visitCodeLiteral(ctx.codeLiteral())
 
-    CreateMetricViewCommand(
+    CreateMetricView(
       withIdentClause(ctx.identifierReference(), UnresolvedIdentifier(_)),
       userSpecifiedColumns,
       visitCommentSpecList(ctx.commentSpec()),
@@ -1450,7 +1444,11 @@ class SparkSqlAstBuilder extends AstBuilder {
       val tableName = ctx.identifierReference.getText.split("\\.").lastOption.getOrElse("table")
       throw QueryCompilationErrors.describeJsonNotExtendedError(tableName)
     }
-    val relation = createUnresolvedTableOrView(ctx.identifierReference, "DESCRIBE TABLE")
+    val relation = createUnresolvedTableOrView(
+      ctx.identifierReference,
+      "DESCRIBE TABLE",
+      allowTempView = true,
+      UnresolvedTableOrViewSearchPathMode.QueryLike)
     if (ctx.describeColName != null) {
       if (ctx.partitionSpec != null) {
         throw QueryParsingErrors.descColumnForPartitionUnsupportedError(ctx)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 919c1b97fd0f7..92818c12bfa09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -328,7 +328,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
             .getOrElse(createJoinWithoutHint())
         }
 
-      case j @ ExtractSingleColumnNullAwareAntiJoin(leftKeys, rightKeys) =>
+      case j @ ExtractSingleColumnNullAwareAntiJoin(leftKeys, rightKeys)
+          if canBroadcastBySize(j.right, conf) =>
         Seq(joins.BroadcastHashJoinExec(leftKeys, rightKeys, LeftAnti, BuildRight,
           None, planLater(j.left), planLater(j.right), isNullAwareAntiJoin = true))
 
@@ -1039,7 +1040,14 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.FilterExec(f.typedCondition(f.deserializer), planLater(f.child)) :: Nil
       case e @ logical.Expand(_, _, child) =>
         execution.ExpandExec(e.projections, e.output, planLater(child)) :: Nil
-      case logical.Sample(lb, ub, withReplacement, seed, child) =>
+      case logical.Sample(lb, ub, withReplacement, seed, child, sampleMethod) =>
+        if (sampleMethod == logical.SampleMethod.System) {
+          // V2ScanRelationPushDown is non-excludable and always handles SYSTEM samples
+          // (either pushes down or throws). Reaching here indicates an internal invariant
+          // violation.
+          throw SparkException.internalError(
+            "TABLESAMPLE SYSTEM node was not properly handled by V2ScanRelationPushDown.")
+        }
         execution.SampleExec(lb, ub, withReplacement, seed, planLater(child)) :: Nil
       case logical.LocalRelation(output, data, _, stream) =>
         LocalTableScanExec(output, data, stream) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index dbe01f1baaa75..dcfa9a399df71 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -117,6 +117,13 @@ trait CodegenSupport extends SparkPlan {
    *     # call consume(), which will call parent.doConsume()
    *      if (shouldStop()) return;
    *   }
+   *
+   * If the emitted code depends on the partition index, read it from
+   * `ctx.currentPartitionIndexVar` rather than hard-coding the field name
+   * `partitionIndex`. `UnionExec` rebinds `currentPartitionIndexVar` per
+   * fused child so that each child sees its own child-local index; a
+   * hard-coded reference would silently observe the global `UnionRDD` index
+   * under fusion.
    */
   protected def doProduce(ctx: CodegenContext): String
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQETestHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQETestHelper.scala
new file mode 100644
index 0000000000000..7e78ee41900ce
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQETestHelper.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.adaptive
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.util.{AccumulatorContext, Utils}
+
+/** Testing only helpers for AQE. */
+object AQETestHelper {
+  // See [withForcedCancellation].
+  @volatile private var metricIdsForForcedCancellation: Set[Long] = Set.empty
+
+  /**
+   * Set `triggerMetrics` to induce a forced cancellation into the execution when any of the
+   * metrics is non-empty.
+   * In this case the results will be discarded and the stage re-run, causing the metrics to be
+   * incremented again.
+   */
+  def withForcedCancellation[T](triggerMetrics: SQLMetric*)(thunk: => T): T = {
+    metricIdsForForcedCancellation = triggerMetrics.map(_.id).toSet
+    val res = try {
+      thunk
+    } finally {
+      metricIdsForForcedCancellation = Set.empty
+      forcedCancellationTriggeredForPlans.clear()
+    }
+    res
+  }
+
+  /*
+   * Track for which plans we have already triggered the forced replanning so we only do it once.
+   */
+  private val forcedCancellationTriggeredForPlans = mutable.HashSet.empty[Int]
+
+  /** Return `true` if forced cancellation mechanism is enabled. */
+  def isForcedCancellationEnabled: Boolean =
+    Utils.isTesting && metricIdsForForcedCancellation.nonEmpty
+
+  /** Return `true` if forced cancellation has already been triggered for `plan`. */
+  private def wasForcedCancellationTriggeredForPlan(plan: SparkPlan): Boolean = synchronized {
+    forcedCancellationTriggeredForPlans.contains(plan.id)
+  }
+
+  /** Mark that force cancellation was successfully triggered for `plan`. */
+  def markForcedCancellationTriggeredForPlan(plan: SparkPlan): Unit = synchronized {
+    assert(!forcedCancellationTriggeredForPlans.contains(plan.id),
+      "A plan was forced to cancel a second time.")
+    forcedCancellationTriggeredForPlans += plan.id
+  }
+
+  /** Return `true` if we should try to force cancellation for `plan` at this point. */
+  def shouldForceCancellation(plan: SparkPlan): Boolean = {
+    // Trigger the forced cancellation only if we are in testing
+    Utils.isTesting &&
+      // ...and if we haven't triggered it yet
+      !wasForcedCancellationTriggeredForPlan(plan) &&
+      // ...and if any of the trigger metrics > 0.
+      metricIdsForForcedCancellation.exists { id =>
+        AccumulatorContext.get(id).map(!_.isZero).getOrElse(false)
+      }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 4840016bf745d..112ee82314c4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -340,6 +340,8 @@ case class AdaptiveSparkPlanExec(
         if (errors.nonEmpty) {
           cleanUpAndThrowException(errors.toSeq, None)
         }
+        val testTriggerForceCancellation = AQETestHelper.shouldForceCancellation(this)
+
         if (!currentPhysicalPlan.isInstanceOf[ResultQueryStageExec]) {
           // Try re-optimizing and re-planning. Adopt the new plan if its cost is equal to or less
           // than that of the current plan; otherwise keep the current physical plan together with
@@ -352,14 +354,21 @@ case class AdaptiveSparkPlanExec(
           // the current physical plan. Once a new plan is adopted and both logical and physical
           // plans are updated, we can clear the query stage list because at this point the two
           // plans are semantically and physically in sync again.
-          val logicalPlan = replaceWithQueryStagesInLogicalPlan(currentLogicalPlan, stagesToReplace)
+          var logicalPlan = replaceWithQueryStagesInLogicalPlan(currentLogicalPlan, stagesToReplace)
+          if (testTriggerForceCancellation) {
+            // Force unwrap all LogicalQueryStage so they get replanned.
+            logicalPlan = logicalPlan.transformDown {
+              case LogicalQueryStage(logical, _) => logical
+            }
+          }
           val afterReOptimize = reOptimize(logicalPlan)
           if (afterReOptimize.isDefined) {
             val (newPhysicalPlan, newLogicalPlan) = afterReOptimize.get
             val origCost = costEvaluator.evaluateCost(currentPhysicalPlan)
             val newCost = costEvaluator.evaluateCost(newPhysicalPlan)
             if (newCost < origCost ||
-              (newCost == origCost && currentPhysicalPlan != newPhysicalPlan)) {
+              (newCost == origCost && currentPhysicalPlan != newPhysicalPlan) ||
+              testTriggerForceCancellation) {
               lazy val plans = sideBySide(
                 currentPhysicalPlan.treeString, newPhysicalPlan.treeString).mkString("\n")
               logOnLevel(log"Plan changed:\n${MDC(QUERY_PLAN, plans)}")
@@ -369,6 +378,9 @@ case class AdaptiveSparkPlanExec(
               stagesToReplace = Seq.empty[QueryStageExec]
             }
           }
+          if (testTriggerForceCancellation) {
+            AQETestHelper.markForcedCancellationTriggeredForPlan(this)
+          }
         }
         // Now that some stages have finished, we can try creating new stages.
         result = createQueryStages(fun, currentPhysicalPlan, firstRun = false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
index 2556edee8d02f..eea664b29fd52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
@@ -94,6 +94,21 @@ trait AdaptiveSparkPlanHelper {
     collect(p) { case plan if allChildren(plan).isEmpty => plan }
   }
 
+  /**
+   * Returns true if the condition specified by `f` is satisfied by any node in this tree.
+   */
+  def exists(p: SparkPlan)(f: SparkPlan => Boolean): Boolean = {
+    find(p)(f).isDefined
+  }
+
+  /**
+   * Like [[exists]], but also considers plan nodes inside subqueries.
+   */
+  def existsWithSubqueries(
+      p: SparkPlan)(f: SparkPlan => Boolean): Boolean = {
+    exists(p)(f) || subqueriesAll(p).exists(exists(_)(f))
+  }
+
   /**
    * Finds and returns the first [[SparkPlan]] of the tree for which the given partial function
    * is defined (pre-order), and applies the partial function to it.
@@ -138,3 +153,5 @@ trait AdaptiveSparkPlanHelper {
     case other => other
   }
 }
+
+private[sql] object AdaptiveSparkPlanHelper extends AdaptiveSparkPlanHelper
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
index d59fe8222c7ff..4d33ed81641d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
@@ -21,9 +21,10 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, ExtractSingleColumnNullAwareAntiJoin}
 import org.apache.spark.sql.catalyst.plans.LeftAnti
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.physical.{BroadcastPartitioning, IdentityBroadcastMode}
 import org.apache.spark.sql.classic.Strategy
 import org.apache.spark.sql.execution.{joins, SparkPlan}
-import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec}
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, HashedRelationBroadcastMode}
 
 /**
  * Strategy for plans containing [[LogicalQueryStage]] nodes:
@@ -36,28 +37,52 @@ import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNes
  */
 object LogicalQueryStageStrategy extends Strategy {
 
-  private def isBroadcastStage(plan: LogicalPlan): Boolean = plan match {
-    case LogicalQueryStage(_, _: BroadcastQueryStageExec) => true
+  private def isBroadcastStageWithHashedBroadcastMode(
+      plan: LogicalPlan,
+      isNullAware: Boolean): Boolean = plan match {
+    case LogicalQueryStage(_, bqs: BroadcastQueryStageExec) =>
+      bqs.broadcast.outputPartitioning match {
+        case BroadcastPartitioning(HashedRelationBroadcastMode(_, stageIsNullAware)) =>
+          stageIsNullAware == isNullAware
+        case _ => false
+      }
+    case _ => false
+  }
+
+  private def isBroadcastStageWithIdentityBroadcastMode(plan: LogicalPlan): Boolean = plan match {
+    case LogicalQueryStage(_, bqs: BroadcastQueryStageExec) =>
+      bqs.broadcast.outputPartitioning match {
+        case BroadcastPartitioning(IdentityBroadcastMode) => true
+        case _ => false
+      }
     case _ => false
   }
 
   def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, otherCondition, _,
           left, right, hint)
-        if isBroadcastStage(left) || isBroadcastStage(right) =>
-      val buildSide = if (isBroadcastStage(left)) BuildLeft else BuildRight
+        if isBroadcastStageWithHashedBroadcastMode(left, isNullAware = false) ||
+            isBroadcastStageWithHashedBroadcastMode(right, isNullAware = false) =>
+      val buildSide =
+        if (isBroadcastStageWithHashedBroadcastMode(left, isNullAware = false)) {
+          BuildLeft
+        } else {
+          BuildRight
+        }
       Seq(BroadcastHashJoinExec(
         leftKeys, rightKeys, joinType, buildSide, otherCondition, planLater(left),
         planLater(right)))
 
     case j @ ExtractSingleColumnNullAwareAntiJoin(leftKeys, rightKeys)
-        if isBroadcastStage(j.right) =>
+        if isBroadcastStageWithHashedBroadcastMode(j.right, isNullAware = true) =>
       Seq(joins.BroadcastHashJoinExec(leftKeys, rightKeys, LeftAnti, BuildRight,
         None, planLater(j.left), planLater(j.right), isNullAwareAntiJoin = true))
 
     case j @ Join(left, right, joinType, condition, _)
-        if isBroadcastStage(left) || isBroadcastStage(right) =>
-      val buildSide = if (isBroadcastStage(left)) BuildLeft else BuildRight
+        if isBroadcastStageWithIdentityBroadcastMode(left) ||
+            isBroadcastStageWithIdentityBroadcastMode(right) =>
+      val buildSide =
+        if (isBroadcastStageWithIdentityBroadcastMode(left)) BuildLeft else BuildRight
       BroadcastNestedLoopJoinExec(
         planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
index 595cecabe9185..352388a6d8a10 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
@@ -150,9 +150,12 @@ trait AggregateCodegenSupport
     }
 
     val doAgg = ctx.freshName("doAggregateWithoutKey")
+    // Pass `partitionIndex` as a parameter so bare references in the child's
+    // produce resolve to the local, not the protected superclass field.
+    // Required when `addNewFunction` spills this helper to a nested class.
     val doAggFuncName = ctx.addNewFunction(doAgg,
       s"""
-         |private void $doAgg() throws java.io.IOException {
+         |private void $doAgg(int partitionIndex) throws java.io.IOException {
          |  // initialize aggregation buffer
          |  $initBufVar
          |
@@ -167,11 +170,11 @@ trait AggregateCodegenSupport
         val beforeAgg = ctx.freshName("beforeAgg")
         s"""
            |long $beforeAgg = System.nanoTime();
-           |$doAggFuncName();
+           |$doAggFuncName(partitionIndex);
            |$aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
          """.stripMargin
       } else {
-        s"$doAggFuncName();"
+        s"$doAggFuncName(partitionIndex);"
       }
 
     s"""
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 4a0db1c613d60..7b3f9ec9951e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -526,9 +526,15 @@ case class HashAggregateExec(
       finishRegularHashMap
     }
 
+    // `partitionIndex` is passed as a parameter so any bare `partitionIndex`
+    // reference in the child's produce resolves to the local parameter, not
+    // the protected `BufferedRowIterator.partitionIndex` field. When
+    // `addNewFunction` spills this helper into a nested class (as can happen
+    // once the outer class passes the code-size threshold), the bare field
+    // reference fails with `IllegalAccessError`.
     val doAggFuncName = ctx.addNewFunction(doAgg,
       s"""
-         |private void $doAgg() throws java.io.IOException {
+         |private void $doAgg(int partitionIndex) throws java.io.IOException {
          |  ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
          |  $finishHashMap
          |}
@@ -615,7 +621,7 @@ case class HashAggregateExec(
        |  $addHookToCloseFastHashMap
        |  $hashMapTerm = $thisPlan.createHashMap();
        |  long $beforeAgg = System.nanoTime();
-       |  $doAggFuncName();
+       |  $doAggFuncName(partitionIndex);
        |  $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
        |}
        |// output the result
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowCompressionUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowCompressionUtils.scala
new file mode 100644
index 0000000000000..19b84d1cd5d8f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowCompressionUtils.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.arrow
+
+import org.apache.arrow.compression.{Lz4CompressionCodec, ZstdCompressionCodec}
+import org.apache.arrow.vector.compression.{CompressionCodec, NoCompressionCodec}
+
+import org.apache.spark.SparkException
+
+private[sql] object ArrowCompressionUtils {
+
+  /**
+   * Creates the write-side Arrow [[CompressionCodec]] for the codec selected by
+   * `spark.sql.execution.arrow.compression.codec`, honoring
+   * `spark.sql.execution.arrow.compression.zstd.level` for zstd.
+   *
+   * The codec instance must be constructed directly rather than through
+   * `CompressionCodec.Factory.INSTANCE.createCodec(codecType)`: the codec type enum does not
+   * carry a compression level, so that factory overload always builds a codec at the default
+   * level, silently dropping the configured one. The level only matters on the write side; the
+   * read side looks up the codec by the type recorded in the IPC message, so it is unaffected.
+   */
+  def createCompressionCodec(
+      codecName: String,
+      zstdCompressionLevel: Int): CompressionCodec = {
+    codecName match {
+      case "none" => NoCompressionCodec.INSTANCE
+      case "zstd" => new ZstdCompressionCodec(zstdCompressionLevel)
+      case "lz4" => new Lz4CompressionCodec()
+      case other =>
+        throw SparkException.internalError(
+          s"Unsupported Arrow compression codec: $other. Supported values: none, zstd, lz4")
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
index b1e8217ff257f..464cac157b25c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -24,15 +24,12 @@ import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
-import org.apache.arrow.compression.{Lz4CompressionCodec, ZstdCompressionCodec}
 import org.apache.arrow.flatbuf.MessageHeader
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector._
-import org.apache.arrow.vector.compression.{CompressionCodec, NoCompressionCodec}
 import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter, ReadChannel, WriteChannel}
 import org.apache.arrow.vector.ipc.message.{ArrowRecordBatch, IpcOption, MessageSerializer}
 
-import org.apache.spark.SparkException
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.util.JavaUtils
@@ -99,22 +96,8 @@ private[sql] object ArrowConverters extends Logging {
     protected val root = VectorSchemaRoot.create(arrowSchema, allocator)
 
     // Create compression codec based on config
-    private val compressionCodecName = SQLConf.get.arrowCompressionCodec
-    private val codec = compressionCodecName match {
-      case "none" => NoCompressionCodec.INSTANCE
-      case "zstd" =>
-        val compressionLevel = SQLConf.get.arrowZstdCompressionLevel
-        val factory = CompressionCodec.Factory.INSTANCE
-        val codecType = new ZstdCompressionCodec(compressionLevel).getCodecType()
-        factory.createCodec(codecType)
-      case "lz4" =>
-        val factory = CompressionCodec.Factory.INSTANCE
-        val codecType = new Lz4CompressionCodec().getCodecType()
-        factory.createCodec(codecType)
-      case other =>
-        throw SparkException.internalError(
-          s"Unsupported Arrow compression codec: $other. Supported values: none, zstd, lz4")
-    }
+    private val codec = ArrowCompressionUtils.createCompressionCodec(
+      SQLConf.get.arrowCompressionCodec, SQLConf.get.arrowZstdCompressionLevel)
     protected val unloader = new VectorUnloader(root, true, codec, true)
     protected val arrowWriter = ArrowWriter.create(root)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 842d9a1f9ce25..88c74ab7adc41 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -25,13 +25,15 @@ import scala.concurrent.ExecutionContext
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, SparkException, TaskContext}
-import org.apache.spark.rdd.{EmptyRDD, PartitionwiseSampledRDD, RDD, SQLPartitioningAwareUnionRDD}
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.rdd.{EmptyRDD, PartitionwiseSampledRDD, RDD, SQLPartitioningAwareUnionRDD, UnionPartition, UnionRDD}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{LongType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -240,6 +242,22 @@ case class FilterExec(condition: Expression, child: SparkPlan)
   // The columns that will filtered out by `IsNotNull` could be considered as not nullable.
   private val notNullAttributes = notNullPreds.flatMap(_.references).distinct.map(_.exprId)
 
+  // `otherPreds` bound against this operator's `output`, shared between the CSE gate in
+  // `doConsume` and the CSE codegen itself. Codegen-only derived state, so `@transient`: it is
+  // computed on the driver during code generation and never accessed on executors.
+  @transient private lazy val boundOtherPreds: Seq[Expression] =
+    otherPreds.map(BindReferences.bindReference(_, output))
+
+  // CSE analysis of `boundOtherPreds`, built once and reused. `doConsume` consults it to decide
+  // whether any common subexpression is worth eliminating; when one is, the same analysis is
+  // handed to `subexpressionEliminationForWholeStageCodegen` rather than rebuilt. `@transient`
+  // because `EquivalentExpressions` is not serializable (and this is driver-only codegen state).
+  @transient private lazy val otherPredsEquivalentExpressions: EquivalentExpressions = {
+    val equivalentExpressions = new EquivalentExpressions
+    boundOtherPreds.foreach(equivalentExpressions.addExprTree(_))
+    equivalentExpressions
+  }
+
   // Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate
   // all the variables at the beginning to take advantage of short circuiting.
   override def usedInputs: AttributeSet = AttributeSet.empty
@@ -289,8 +307,21 @@ case class FilterExec(condition: Expression, child: SparkPlan)
     //       without consulting `isNull_X`. The (b) interleaving gives us that ordering
     //       for free, since the IsNotNull check fires before the CSE precompute keyed
     //       off the same reference.
+    // Only take the CSE path when there is actually a common subexpression to eliminate. That
+    // path emits the `inputVarsEvalCode` prologue below, which eagerly evaluates every
+    // `otherPreds` input column at the top of the row loop -- required so eliminated
+    // subexpressions can be materialized into shared variables, but it defeats the
+    // short-circuiting the non-CSE path gets from loading columns lazily, just before the
+    // predicate that needs them. With no common subexpression the prologue is pure overhead
+    // (e.g. decoding a decimal column for rows a cheaper earlier predicate would reject), so we
+    // fall back to `generatePredicateCode`.
+    //
+    // `subexpressionElimination.filterExec.enabled` additionally gates this path so it can be
+    // turned off independently of subexpression elimination elsewhere.
     val (prologueCode, predicateCode) =
-      if (conf.subexpressionEliminationEnabled && otherPreds.nonEmpty) {
+      if (conf.subexpressionEliminationEnabled && conf.subexpressionEliminationFilterExecEnabled &&
+          otherPreds.nonEmpty &&
+          otherPredsEquivalentExpressions.getCommonSubexpressions.nonEmpty) {
         // Pre-evaluate input variables before CSE analysis: CSE clears
         // ctx.currentVars[i].code as a side effect; without this pre-evaluation, Janino
         // fails when otherPreds reference the same input columns that CSE already
@@ -299,8 +330,8 @@ case class FilterExec(condition: Expression, child: SparkPlan)
         val inputVarsEvalCode = evaluateRequiredVariables(
           child.output, input, otherPredInputAttrs)
 
-        val boundOtherPreds = otherPreds.map(BindReferences.bindReference(_, output))
-        val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundOtherPreds)
+        val subExprs =
+          ctx.subexpressionEliminationForWholeStageCodegen(otherPredsEquivalentExpressions)
 
         // Group CSE states by the index of the first otherPred that references them.
         // `evaluateSubExprEliminationState` recursively emits each state's children
@@ -461,7 +492,8 @@ case class SampleExec(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
   protected override def doExecute(): RDD[InternalRow] = {
-    if (withReplacement) {
+    val numOutputRows = longMetric("numOutputRows")
+    val sampled = if (withReplacement) {
       // Disable gap sampling since the gap sampling method buffers two rows internally,
       // requiring us to copy the row, which is more expensive than the random number generator.
       new PartitionwiseSampledRDD[InternalRow, InternalRow](
@@ -472,6 +504,12 @@ case class SampleExec(
     } else {
       child.execute().randomSampleWithRange(lowerBound, upperBound, resolvedSeed)
     }
+    sampled.mapPartitionsInternal { iter =>
+      iter.map { row =>
+        numOutputRows += 1
+        row
+      }
+    }
   }
 
   // Mark this as empty. This plan doesn't need to evaluate any inputs and can defer the evaluation
@@ -507,7 +545,7 @@ case class SampleExec(
               |   java.util.Random random = new java.util.Random(${resolvedSeed}L);
               |   long randomSeed = random.nextLong();
               |   int loopCount = 0;
-              |   while (loopCount < partitionIndex) {
+              |   while (loopCount < ${ctx.currentPartitionIndexVar}) {
               |     randomSeed = random.nextLong();
               |     loopCount += 1;
               |   }
@@ -530,7 +568,7 @@ case class SampleExec(
       val sampler = ctx.addMutableState(s"$samplerClass<UnsafeRow>", "sampler",
         v => s"""
           | $v = new $samplerClass<UnsafeRow>($lowerBound, $upperBound, false);
-          | $v.setSeed(${resolvedSeed}L + partitionIndex);
+          | $v.setSeed(${resolvedSeed}L + ${ctx.currentPartitionIndexVar});
          """.stripMargin.trim)
 
       s"""
@@ -627,9 +665,10 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
     // The default size of a batch, which must be positive integer
     val batchSize = 1000
 
-    val initRangeFuncName = ctx.addNewFunction("initRange",
+    val initRangeName = ctx.freshName("initRange")
+    val initRangeFuncName = ctx.addNewFunction(initRangeName,
       s"""
-        | private void initRange(int idx) {
+        | private void $initRangeName(int idx) {
         |   $BigInt index = $BigInt.valueOf(idx);
         |   $BigInt numSlice = $BigInt.valueOf(${numSlices}L);
         |   $BigInt numElement = $BigInt.valueOf(${numElements.toLong}L);
@@ -717,7 +756,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       | // initialize Range
       | if (!$initTerm) {
       |   $initTerm = true;
-      |   $initRangeFuncName(partitionIndex);
+      |   $initRangeFuncName(${ctx.currentPartitionIndexVar});
       | }
       |
       | while ($loopCondition) {
@@ -820,7 +859,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
  * If we change how this is implemented physically, we'd need to update
  * [[org.apache.spark.sql.catalyst.plans.logical.Union.maxRowsPerPartition]].
  */
-case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan {
+case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan with CodegenSupport {
   // updating nullability to make all the children consistent
   override def output: Seq[Attribute] = {
     children.map(_.output).transpose.map { attrs =>
@@ -898,8 +937,215 @@ case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan {
     }
   }
 
+  // True when the codegen path applies: `outputPartitioning` is `UnknownPartitioning`,
+  // and `unionedInputRDD` matches the semantics of `sparkContext.union(...)` in `doExecute`.
+  private def isPlainUnion: Boolean = outputPartitioning.isInstanceOf[UnknownPartitioning]
+
+  // Per-child projection from the child's output to the union's output. The wrapped
+  // child is always the source `Attribute` (deterministic by construction); the Alias
+  // only remaps the exprId/name/metadata. `WidenSetOperationTypes` aligns top-level
+  // dataTypes, but nested nullability differences bypass it; those cases are caught
+  // by the `type-mismatch` gate below, which is the single source of truth for the
+  // `src.dataType == tgt.dataType` invariant `doConsume` relies on.
+  @transient private lazy val perChildProjections: IndexedSeq[Seq[NamedExpression]] =
+    children.toIndexedSeq.map { child =>
+      child.output.zip(output).map { case (src, tgt) =>
+        Alias(src, tgt.name)(
+          exprId = tgt.exprId,
+          qualifier = tgt.qualifier,
+          explicitMetadata = Some(tgt.metadata))
+      }
+    }
+
+  // Memoized: consulted by `supportCodegen` (called multiple times by
+  // `CollapseCodegenStages`) and by `metrics`. Conf and children are stable
+  // for a given UnionExec instance; cross-plan staleness is impossible since
+  // UnionExec is a case class and `withNewChildren` produces a fresh instance.
+  @transient private lazy val supportCodegenFailureReason: Option[String] = {
+    if (!conf.getConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED)) {
+      Some("union-codegen-disabled")
+    } else if (!isPlainUnion) {
+      Some("partitioning-aware")
+    } else if (children.exists(_.exists(_.isInstanceOf[UnionExec]))) {
+      Some("nested-union")
+    } else if (children.exists(_.exists(UnionExec.isKnownMultiInputRDDCodegen))) {
+      Some("multi-rdd-child")
+    } else if (children.exists(UnionExec.hasPartitionIndexDependentCodegen)) {
+      Some("partition-index-dependent-child")
+    } else if (children.size > conf.getConf(SQLConf.WHOLESTAGE_UNION_MAX_CHILDREN)) {
+      Some("max-children-exceeded")
+    } else if (supportsColumnar) {
+      Some("columnar")
+    } else if (children.exists(c =>
+      c.output.zip(output).exists { case (src, tgt) => src.dataType != tgt.dataType })) {
+      Some("type-mismatch")
+    } else {
+      None
+    }
+  }
+
+  override def supportCodegen: Boolean = {
+    val reason = supportCodegenFailureReason
+    if (reason.isEmpty) true
+    else {
+      logDebug(log"UnionExec codegen skipped: " +
+        log"reason=${MDC(LogKeys.REASON, reason.get)}, " +
+        log"numChildren=${MDC(LogKeys.NUM_CHILDREN, children.size)}\n" +
+        log"${MDC(LogKeys.TREE_NODE, treeString)}")
+      false
+    }
+  }
+
+  // Registered only when fusion will actually run, so plans that fall back
+  // to `doExecute` (which never updates the metric) do not surface a
+  // 0-valued row count in the SQL UI. `doConsume` is the sole incrementer.
+  override lazy val metrics: Map[String, SQLMetric] =
+    if (supportCodegenFailureReason.isEmpty) {
+      Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
+    } else {
+      Map.empty
+    }
+
+  // Builds a plain `UnionRDD` directly (not `SparkContext.union`) to preserve
+  // a 1:1 partition-to-child mapping via `UnionPartition.parentRddIndex`.
+  // The `require` below is a backstop: any multi-RDD `CodegenSupport`
+  // operator missing from `isKnownMultiInputRDDCodegen` will trip here
+  // instead of falling back gracefully.
+  @transient private lazy val unionedInputRDD: RDD[InternalRow] = {
+    val childRDDs: Seq[RDD[InternalRow]] = children.map { c =>
+      val cs = c.asInstanceOf[CodegenSupport]
+      val rdds = cs.inputRDDs()
+      require(rdds.size == 1,
+        s"UnionExec.inputRDDs: child ${c.nodeName} returned ${rdds.size} RDDs")
+      rdds.head
+    }
+    new UnionRDD(sparkContext, childRDDs)
+  }
+
+  override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(unionedInputRDD)
+
+  // Per-emission codegen state, set in `doProduce` and read in `doConsume`.
+  // `numOutputRowsTerm` is registered once per stage so the metric appears in
+  // `references[]` exactly once instead of once per child; `currentEmittingChild`
+  // tells `doConsume` which child's projection to bind.
+  //
+  // A single `UnionExec` instance can have its codegen driven by more than one
+  // thread at the same time: a reused exchange/subquery stage is generated
+  // concurrently with the main plan, and async subquery / dynamic-pruning
+  // execution can overlap a driver-side `doCodeGen`. A plain field would let a
+  // racing `doProduce` reset `currentEmittingChild` to -1 while another thread
+  // is still in `doConsume`. Each `doCodeGen` pass is itself single-threaded
+  // (`produce` -> `doConsume` run inline on one thread), so a `ThreadLocal`
+  // isolates the state per pass without that cross-thread race.
+  //
+  // This state is valid only for the duration of one `doCodeGen` pass, not for
+  // the lifetime of a thread (much like the per-pass fields on `CodegenContext`,
+  // e.g. `currentPartitionIndexVar`, which `doProduce` saves and restores just
+  // below). `ThreadLocal` is correct because per-pass and per-thread coincide
+  // here: a pass runs inline on one thread and passes never nest on a thread.
+  // We keep it in a `ThreadLocal` rather than routing it through `ctx` because
+  // `CodegenContext` has no general-purpose per-pass attribute map; threading it
+  // through `ctx` would mean adding `UnionExec`-specific fields to a class shared
+  // by every operator. The `ThreadLocal` keeps this state local to the node that
+  // needs it. Resetting `currentEmittingChild` to -1 at the end of `doProduce`
+  // also guards against a stale value being read by a later, unrelated pass
+  // that reuses the same pooled thread.
+  @transient private lazy val numOutputRowsTerm = new ThreadLocal[String]
+  @transient private lazy val currentEmittingChild: ThreadLocal[Int] =
+    ThreadLocal.withInitial(() => -1)
+
+  override protected def doProduce(ctx: CodegenContext): String = {
+    numOutputRowsTerm.set(metricTerm(ctx, "numOutputRows"))
+
+    // For each partition of the unioned RDD, record its owning child and its
+    // index within that child's RDD. Read both fields directly off the
+    // `UnionPartition` so the lookup arrays do not assume `UnionRDD` lays
+    // partitions out in child order.
+    val (partitionToChild, partitionToLocalIdx) = unionedInputRDD.partitions.map { p =>
+      val up = p.asInstanceOf[UnionPartition[_]]
+      (up.parentRddIndex, up.parentPartition.index)
+    }.unzip
+    val p2cRef = ctx.addReferenceObj("partitionToChild", partitionToChild)
+    val p2lRef = ctx.addReferenceObj("partitionToLocalIdx", partitionToLocalIdx)
+    val childIndexVar = ctx.freshName("unionChildIdx")
+
+    // Each child's produced code is wrapped in its own helper method.
+    // Without this, the fused method's bytecode grows linearly with the
+    // number of children and quickly exceeds HotSpot's per-method limit,
+    // forcing the whole stage to run interpreted.
+    //
+    // The helper takes `int partitionIndex` as a parameter; `addNewFunction`
+    // may spill helpers into a nested class once the outer class fills up,
+    // and a nested class cannot access the protected
+    // `BufferedRowIterator.partitionIndex` field.
+    //
+    // `currentPartitionIndexVar` is rebound to an array-deref expression
+    // (rather than a local) so leaf operators (`RangeExec`, `SampleExec`)
+    // see the child-local index regardless of where their code is emitted.
+    // `SampleExec.doConsume` uses `addMutableState`, whose initializer is
+    // emitted into the state-init function, not the helper - a local in
+    // the helper would not be in scope there. The expression resolves
+    // against `partitionIndex` (the helper parameter inside the helper,
+    // and the `BufferedRowIterator` field elsewhere) in every context.
+    val savedPartIdxVar = ctx.currentPartitionIndexVar
+    ctx.currentPartitionIndexVar = s"((int[]) $p2lRef)[partitionIndex]"
+    val cases = children.zipWithIndex.map { case (c, i) =>
+      currentEmittingChild.set(i)
+      val producedCode = c.asInstanceOf[CodegenSupport].produce(ctx, this)
+      val helper = ctx.freshName("unionChildProcess")
+      val qualifiedHelper = ctx.addNewFunction(helper,
+        s"""
+           |private void $helper(int partitionIndex) throws java.io.IOException {
+           |  $producedCode
+           |}
+         """.stripMargin)
+      s"""case $i: {
+         |  $qualifiedHelper(partitionIndex);
+         |  break;
+         |}""".stripMargin
+    }
+    currentEmittingChild.set(-1)
+    ctx.currentPartitionIndexVar = savedPartIdxVar
+
+    s"""
+       |int $childIndexVar = ((int[]) $p2cRef)[partitionIndex];
+       |switch ($childIndexVar) {
+       |  ${cases.mkString("\n")}
+       |  default:
+       |    throw new java.lang.IllegalStateException(
+       |      "UnionExec: Unexpected childIndex=" + $childIndexVar);
+       |}
+     """.stripMargin
+  }
+
+  override def doConsume(
+      ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
+    val i = currentEmittingChild.get
+    require(i >= 0, "UnionExec.doConsume invoked outside doProduce emission window")
+    // Route BoundReference reads through `currentVars` (the incoming row is
+    // delivered as variables under WSCG, not via ctx.INPUT_ROW).
+    val bound = BindReferences.bindReferences(perChildProjections(i), children(i).output)
+    ctx.currentVars = input
+    ctx.INPUT_ROW = null
+    val projectedExprCodes = bound.map(_.genCode(ctx))
+
+    s"""
+       |${numOutputRowsTerm.get}.add(1L);
+       |${consume(ctx, projectedExprCodes)}
+     """.stripMargin
+  }
+
+  // True if any child requires result copying; the default throws for
+  // multi-child operators and is unsuitable here.
+  override def needCopyResult: Boolean =
+    children.exists(_.asInstanceOf[CodegenSupport].needCopyResult)
+
+  // `doConsume` handles projection and emission; the parent's `consume` driver
+  // decides which output columns to materialize.
+  override def usedInputs: AttributeSet = AttributeSet.empty
+
   protected override def doExecute(): RDD[InternalRow] = {
-    if (outputPartitioning.isInstanceOf[UnknownPartitioning]) {
+    if (isPlainUnion) {
       sparkContext.union(children.map(_.execute()))
     } else {
       // This union has a known partitioning, i.e., its children have the same partitioning
@@ -922,6 +1168,39 @@ case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan {
     copy(children = newChildren)
 }
 
+object UnionExec {
+  /**
+   * Codegen operators that return more than one RDD from `inputRDDs()`.
+   * `UnionExec`'s fusion assumes each direct child contributes one RDD.
+   */
+  def isKnownMultiInputRDDCodegen(p: SparkPlan): Boolean = p match {
+    case _: SortMergeJoinExec => true
+    case _: ShuffledHashJoinExec => true
+    case _ => false
+  }
+
+  /**
+   * True if any expression in the subtree embeds the raw `partitionIndex` field
+   * via `addPartitionInitializationStatement`, which would read the global
+   * UnionRDD index instead of the child-local one under fusion.
+   *
+   * The check uses [[Nondeterministic]] as the proxy: every catalyst expression
+   * that calls `addPartitionInitializationStatement` referencing `partitionIndex`
+   * is `Nondeterministic`. The `InputFile*` expressions are `Nondeterministic`
+   * but read from `InputFileBlockHolder` (a per-task thread-local) and do not
+   * embed `partitionIndex`, so they are safe under fusion.
+   */
+  def hasPartitionIndexDependentCodegen(p: SparkPlan): Boolean = p.exists { plan =>
+    plan.expressions.exists(_.exists {
+      case _: InputFileName => false
+      case _: InputFileBlockStart => false
+      case _: InputFileBlockLength => false
+      case _: Nondeterministic => true
+      case _ => false
+    })
+  }
+}
+
 /**
  * Physical plan for returning a new RDD that has exactly `numPartitions` partitions.
  * Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 157f0071a3dc6..924e8b820a7c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.CatalogStatistics
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.classic.ClassicConversions.castToImpl
@@ -104,7 +104,7 @@ case class AnalyzeColumnCommand(
   private def analyzeColumnInCatalog(sparkSession: SparkSession): Unit = {
     val sessionState = sparkSession.sessionState
     val tableMeta = sessionState.catalog.getTableMetadata(tableIdent)
-    if (tableMeta.tableType == CatalogTableType.VIEW) {
+    if (tableMeta.isViewLike) {
       // Analyzes a catalog view if the view is cached
       val plan = sparkSession.table(tableIdent.quotedString).logicalPlan
       if (!analyzeColumnInCachedData(plan, sparkSession)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
index 8f1e05c87c8f3..39169f8d22df4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.classic.ClassicConversions.castToImpl
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -75,7 +75,7 @@ case class AnalyzePartitionCommand(
     val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase)
     val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
     val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB)
-    if (tableMeta.tableType == CatalogTableType.VIEW) {
+    if (tableMeta.isViewLike) {
       throw QueryCompilationErrors.analyzeTableNotSupportedOnViewsError()
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index c7d32b7fece01..e2c1d80658d1f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -29,7 +29,7 @@ import org.apache.spark.internal.LogKeys.{COUNT, DATABASE_NAME, ERROR, TABLE_NAM
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.analysis.ResolvedIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTablePartition, CatalogTableType, ExternalCatalogUtils}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTablePartition, ExternalCatalogUtils}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -241,7 +241,7 @@ object CommandUtils extends Logging {
     val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase)
     val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
     val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB)
-    if (tableMeta.tableType == CatalogTableType.VIEW) {
+    if (tableMeta.isViewLike) {
       // Analyzes a catalog view if the view is cached
       val table = sparkSession.table(tableIdent.quotedString)
       val cacheManager = sparkSession.sharedState.cacheManager
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
index 9bfdff127c5a5..a597087085b42 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.CreateUserDefinedFunctionCommand._
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.types.{DataType, MetadataBuilder, StructField, StructType}
 
 /**
  * The DDL command that creates a SQL function.
@@ -109,6 +109,18 @@ case class CreateSQLFunctionCommand(
         // Qualify the input parameters with the function name so that attributes referencing
         // the function input parameters can be resolved correctly.
         val qualifier = Seq(name.funcName)
+        // Mark scalar UDF parameter aliases as function input so name resolution can give a
+        // parameterless built-in function precedence over a same-named UDF parameter. Table UDF
+        // bodies reference parameters as outer references, where a parameterless function already
+        // wins via the pre-existing "function beats outer reference" precedence, so the marker is
+        // not applied (and would not be consumed) there.
+        val funcInputMetadata = if (isTableFunc) {
+          None
+        } else {
+          Some(new MetadataBuilder()
+            .putBoolean(SessionCatalog.SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY, true)
+            .build())
+        }
         val input = param.map(p => Alias(
           {
             val defaultExpr = p.getDefault()
@@ -131,7 +143,7 @@ case class CreateSQLFunctionCommand(
               }
               Cast(defaultPlan, p.dataType)
             }
-          }, p.name)(qualifier = qualifier))
+          }, p.name)(qualifier = qualifier, explicitMetadata = funcInputMetadata))
         Project(input, OneRowRelation())
       } else {
         OneRowRelation()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeFunctionCommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeFunctionCommandUtils.scala
deleted file mode 100644
index 24b04a9e3faf8..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeFunctionCommandUtils.scala
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.command
-
-import java.util
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.catalog.{SQLFunction, SqlPathFormat, UserDefinedFunction}
-import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
-
-/**
- * Helpers for [[DescribeFunctionCommand]] to retrieve and format
- * the frozen SQL PATH stored in SQL function metadata.
- */
-private[command] object DescribeFunctionCommandUtils {
-
-  /**
-   * Returns the frozen SQL PATH persisted for a SQL function, formatted
-   * for display. Persistent functions: loads [[CatalogFunction]] metadata
-   * from the catalog. Temporary SQL UDFs (not in catalog): falls back to
-   * parsing the usage JSON blob produced by [[SQLFunction.toExpressionInfo]].
-   */
-  private[command] def storedResolutionPathString(
-      sparkSession: SparkSession,
-      identifier: FunctionIdentifier,
-      info: ExpressionInfo): Option[String] = {
-    val rawJson = try {
-      val meta = sparkSession.sessionState.catalog
-        .getFunctionMetadata(identifier)
-      if (meta.isUserDefinedFunction) {
-        val udf = UserDefinedFunction.fromCatalogFunction(
-          meta,
-          sparkSession.sessionState.sqlParser)
-        udf.asInstanceOf[SQLFunction].functionStoredResolutionPath
-      } else {
-        None
-      }
-    } catch {
-      case _: org.apache.spark.sql.catalyst.analysis
-        .NoSuchFunctionException |
-          _: org.apache.spark.sql.catalyst.analysis
-            .NoSuchDatabaseException =>
-        extractResolutionPathFromSqlUdfUsage(info.getUsage)
-    }
-    rawJson.flatMap(formatStoredPath)
-  }
-
-  private def formatStoredPath(pathStr: String): Option[String] = {
-    SqlPathFormat.toDescribeJson(pathStr)
-      .flatMap(SqlPathFormat.formatForDisplay)
-  }
-
-  /**
-   * For temporary SQL UDFs not in the catalog, the resolution path may
-   * be embedded in the ExpressionInfo usage JSON blob. Returns None if
-   * the usage string is not JSON or does not contain the path key.
-   */
-  private def extractResolutionPathFromSqlUdfUsage(
-      usage: String): Option[String] = {
-    if (usage == null || usage.isEmpty) return None
-    try {
-      val map = UserDefinedFunction.mapper.readValue(
-        usage, classOf[util.HashMap[String, String]])
-      Option(map.get(SQLFunction.FUNCTION_RESOLUTION_PATH))
-        .filter(_.nonEmpty)
-    } catch {
-      case e: com.fasterxml.jackson.core.JsonProcessingException =>
-        throw new org.apache.spark.SparkException(
-          s"Corrupted SQL UDF metadata: expected JSON usage blob " +
-          s"but failed to parse: ${e.getMessage}", e)
-    }
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala
index ed248ccca67a7..64317a04547a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala
@@ -27,14 +27,14 @@ import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedPersistentView, ResolvedTable, ResolvedTempView}
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.classic.ClassicConversions.castToImpl
+import org.apache.spark.sql.connector.catalog.{V1Table, V1ViewInfo}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-import org.apache.spark.sql.connector.catalog.V1Table
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.PartitioningUtils
@@ -71,10 +71,19 @@ case class DescribeRelationJsonCommand(
         if (partitionSpec.nonEmpty) {
           throw QueryCompilationErrors.descPartitionNotAllowedOnView(v.identifier.name())
         }
+        // Resolve `v.info` to a `CatalogTable` so the JSON renderer below can read v1-shaped
+        // fields uniformly. Session-catalog views carry the original `CatalogTable` inside
+        // `V1ViewInfo`; non-session v2 views carry a plain `ViewInfo` and are projected to a
+        // `CatalogTable` via `V1Table.toCatalogTable`, the same conversion the
+        // `CreateTableLike` strategy case in `DataSourceV2Strategy` uses.
+        val metadata = v.info match {
+          case v1Info: V1ViewInfo => v1Info.v1Table
+          case info => V1Table.toCatalogTable(v.catalog, v.identifier, info)
+        }
         describeIdentifier(v.identifier.toQualifiedNameParts(v.catalog), jsonMap)
-        describeColsJson(v.metadata.schema, jsonMap)
-        describeFormattedTableInfoJson(v.metadata, jsonMap)
-        describeViewSqlConfsJson(v.metadata, jsonMap)
+        describeColsJson(metadata.schema, jsonMap)
+        describeFormattedTableInfoJson(metadata, jsonMap)
+        describeViewSqlConfsJson(metadata, jsonMap)
 
       case ResolvedTable(catalog, identifier, V1Table(metadata), _) =>
         describeIdentifier(identifier.toQualifiedNameParts(catalog), jsonMap)
@@ -300,7 +309,7 @@ case class DescribeRelationJsonCommand(
       catalog: SessionCatalog,
       metadata: CatalogTable,
       jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
-    if (metadata.tableType == CatalogTableType.VIEW) {
+    if (metadata.isViewLike) {
       throw QueryCompilationErrors.descPartitionNotAllowedOnView(metadata.identifier.identifier)
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index e248f0eea96de..4a9bebe75cff1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -108,8 +108,8 @@ case class SetCommand(kv: Option[(String, Option[String])])
         }
         if (varName.nonEmpty && varName.length <= 3) {
           val variableResolution = new VariableResolution(
-            sparkSession.sessionState.analyzer.catalogManager.tempVariableManager
-          )
+            sparkSession.sessionState.analyzer.catalogManager.tempVariableManager,
+            sparkSession.sessionState.analyzer.catalogManager)
           val variable = variableResolution.lookupVariable(
             nameParts = varName
           )
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala
index 70538160eefdb..82ab46ec9b140 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetPathCommand.scala
@@ -17,40 +17,17 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.util.Locale
-
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.connector.catalog.CatalogManager.{
-  CurrentSchemaEntry, LiteralPathEntry, SessionPathEntry
-}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.connector.catalog.PathElement
 import org.apache.spark.sql.internal.SQLConf
 
-/**
- * Path element for SET PATH: either a well-known shortcut or a fully qualified schema reference.
- * SchemaInPath requires at least 2 parts (catalog.namespace); multi-level namespaces are allowed.
- */
-sealed trait PathElement
-
-object PathElement {
-  case object DefaultPath extends PathElement
-  case object SystemPath extends PathElement
-  case object PathRef extends PathElement
-  /**
-   * Current database/schema (SQL aliases). Stored as system.current_schema; expands when
-   * building resolution candidates so later USE SCHEMA is reflected.
-   */
-  case object CurrentDatabase extends PathElement
-  case object CurrentSchema extends PathElement
-  /** Fully qualified schema reference (catalog.namespace...). Must have at least 2 parts. */
-  case class SchemaInPath(parts: Seq[String]) extends PathElement
-}
-
 /**
  * Command for SET PATH = pathElement (, pathElement)*
  * Expands shortcuts at run time, validates no duplicates, and sets the internal session path.
+ *
+ * The [[PathElement]] AST and its expansion live in catalyst so that the same grammar can be
+ * reused to parse the [[SQLConf.DEFAULT_PATH]] conf value.
  */
 case class SetPathCommand(elements: Seq[PathElement]) extends LeafRunnableCommand {
 
@@ -64,23 +41,9 @@ case class SetPathCommand(elements: Seq[PathElement]) extends LeafRunnableComman
     }
     val conf = sparkSession.sessionState.conf
     val catalogManager = sparkSession.sessionState.catalogManager
-    val currentCatalog = catalogManager.currentCatalog.name
-    val currentNamespace = catalogManager.currentNamespace.toSeq
-    val caseSensitive = conf.caseSensitiveAnalysis
 
-    val expanded = expandPathElements(elements, conf, catalogManager)
-    val seen = new scala.collection.mutable.HashSet[Seq[String]]
-    expanded.foreach { entry =>
-      val concrete = entry.resolve(currentCatalog, currentNamespace)
-      def normalize(s: String): String = if (caseSensitive) s else s.toLowerCase(Locale.ROOT)
-      val key = concrete.map(normalize)
-      if (!seen.add(key)) {
-        throw new AnalysisException(
-          errorClass = "DUPLICATE_SQL_PATH_ENTRY",
-          messageParameters = Map("pathEntry" ->
-            concrete.map(p => if (p.contains(".")) s"`$p`" else p).mkString(".")))
-      }
-    }
+    val expanded0 = PathElement.expand(elements, conf, catalogManager)
+    val expanded = PathElement.validateNoStaticDuplicates(expanded0, conf.caseSensitiveAnalysis)
 
     if (expanded.isEmpty) {
       catalogManager.clearSessionPath()
@@ -89,36 +52,4 @@ case class SetPathCommand(elements: Seq[PathElement]) extends LeafRunnableComman
     }
     Seq.empty
   }
-
-  private def expandPathElements(
-      elements: Seq[PathElement],
-      conf: SQLConf,
-      catalogManager: CatalogManager): Seq[SessionPathEntry] = {
-    val currentSchemaSentinel = Seq("__current_schema__")
-
-    def toEntries(parts: Seq[Seq[String]]): Seq[SessionPathEntry] = parts.map {
-      case p if p == currentSchemaSentinel => CurrentSchemaEntry
-      case p => LiteralPathEntry(p)
-    }
-
-    def defaultWithCurrentSchema: Seq[SessionPathEntry] =
-      toEntries(conf.defaultPathOrder(Seq(currentSchemaSentinel)))
-
-    elements.flatMap {
-      case PathElement.DefaultPath =>
-        defaultWithCurrentSchema
-      case PathElement.SystemPath =>
-        toEntries(conf.systemPathOrder)
-      case PathElement.CurrentDatabase | PathElement.CurrentSchema =>
-        Seq(CurrentSchemaEntry)
-      case PathElement.PathRef =>
-        catalogManager.sessionPathEntries.getOrElse(defaultWithCurrentSchema)
-      case PathElement.SchemaInPath(parts) =>
-        if (parts.length < 2) {
-          throw QueryCompilationErrors.invalidSqlPathSchemaReferenceError(parts.mkString("."))
-        }
-        Seq(LiteralPathEntry(parts))
-    }
-  }
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 0415a33e2d6dd..7519216f1b367 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -50,7 +50,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
   extends LeafRunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    assert(table.tableType != CatalogTableType.VIEW)
+    assert(!table.isViewLike)
     assert(table.provider.isDefined)
 
     val sessionState = sparkSession.sessionState
@@ -151,7 +151,7 @@ case class CreateDataSourceTableAsSelectCommand(
   override def innerChildren: Seq[LogicalPlan] = query :: Nil
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    assert(table.tableType != CatalogTableType.VIEW)
+    assert(!table.isViewLike)
     assert(table.provider.isDefined)
 
     val sessionState = sparkSession.sessionState
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 30f42ac96a284..354b22d572c18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -234,7 +234,8 @@ case class DropTableCommand(
       // If the command DROP VIEW is to drop a table or DROP TABLE is to drop a view
       // issue an exception.
       table.tableType match {
-        case CatalogTableType.VIEW if !isView =>
+        // Both VIEW and METRIC_VIEW are conceptually views and must be dropped via DROP VIEW.
+        case t if CatalogTable.isViewLike(t) && !isView =>
           throw QueryCompilationErrors.wrongCommandForObjectTypeError(
             operation = "DROP TABLE",
             requiredType = s"${CatalogTableType.EXTERNAL.name} or ${CatalogTableType.MANAGED.name}",
@@ -242,10 +243,11 @@ case class DropTableCommand(
             foundType = catalog.getTableMetadata(tableName).tableType.name,
             alternative = "DROP VIEW"
           )
-        case o if o != CatalogTableType.VIEW && isView =>
+        case o if !CatalogTable.isViewLike(o) && isView =>
           throw QueryCompilationErrors.wrongCommandForObjectTypeError(
             operation = "DROP VIEW",
-            requiredType = CatalogTableType.VIEW.name,
+            requiredType =
+              s"${CatalogTableType.VIEW.name} or ${CatalogTableType.METRIC_VIEW.name}",
             objectName = catalog.getTableMetadata(tableName).qualifiedName,
             foundType = o.name,
             alternative = "DROP TABLE"
@@ -1092,11 +1094,11 @@ object DDLUtils extends Logging {
       isView: Boolean): Unit = {
     if (!catalog.isTempView(tableMetadata.identifier)) {
       tableMetadata.tableType match {
-        case CatalogTableType.VIEW if !isView =>
+        case t if CatalogTable.isViewLike(t) && !isView =>
           throw QueryCompilationErrors.cannotAlterViewWithAlterTableError(
             viewName = tableMetadata.identifier.table
           )
-        case o if o != CatalogTableType.VIEW && isView =>
+        case o if !CatalogTable.isViewLike(o) && isView =>
           throw QueryCompilationErrors.cannotAlterTableWithAlterViewError(
             tableName = tableMetadata.identifier.table
           )
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index 5929e5c56f909..79db97744496e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -17,16 +17,20 @@
 
 package org.apache.spark.sql.execution.command
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource, SQLFunction}
+import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource, SQLFunction, SqlPathFormat}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionInfo}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{NullType, StringType, StructField, StructType}
 
 
 /**
@@ -101,6 +105,97 @@ case class DescribeFunctionCommand(
     toAttributes(schema)
   }
 
+  private def append(buffer: ArrayBuffer[(String, String)], key: String, value: String): Unit = {
+    buffer += (key -> value)
+  }
+
+  /**
+   * Pad all input strings to the same length using the max string length among all inputs.
+   */
+  private def tabulate(inputs: Seq[String]): Seq[String] = {
+    val maxLen = inputs.map(_.length).max
+    inputs.map { input => input.padTo(maxLen, " ").mkString }
+  }
+
+  private def formatParameters(params: StructType): Seq[String] = {
+    val names = tabulate(params.map(_.name))
+    val dataTypes = tabulate(params.map(_.dataType.sql))
+    // Only show parameter comments in extended mode.
+    val comments = params.map { p =>
+      if (isExtended) p.getComment().map(c => s" '$c'").getOrElse("") else ""
+    }
+    val defaults = params.map { p =>
+      if (isExtended) p.getDefault().map(d => s" DEFAULT $d").getOrElse("") else ""
+    }
+    names zip dataTypes zip defaults zip comments map {
+      case (((name, dataType), default), comment) => s"$name $dataType$default$comment"
+    }
+  }
+
+  private def describeSQLFunction(
+      info: ExpressionInfo,
+      qualifiedName: FunctionIdentifier,
+      parser: ParserInterface): Seq[Row] = {
+    val buffer = new ArrayBuffer[(String, String)]
+    val f = SQLFunction.fromExpressionInfo(info, parser)
+    // Match the legacy DESCRIBE FUNCTION path's qualification depth so
+    // `Function:` always renders the catalog-qualified 3-part name (when
+    // applicable), regardless of whether the function is a SQL UDF.
+    append(buffer, "Function:", qualifiedName.unquotedString)
+    append(buffer, "Type:", if (f.isTableFunc) SQLFunction.TABLE else SQLFunction.SCALAR)
+    // Function input
+    val input = f.inputParam
+    if (input.nonEmpty) {
+      val params = formatParameters(input.get)
+      assert(params.nonEmpty)
+      append(buffer, "Input:", params.head)
+      params.tail.foreach(s => append(buffer, "", s))
+    } else {
+      append(buffer, "Input:", "()")
+    }
+    // Function returns
+    if (f.isTableFunc) {
+      val returnParams = formatParameters(f.getTableFuncReturnCols)
+      assert(returnParams.nonEmpty)
+      append(buffer, "Returns:", returnParams.head)
+      returnParams.tail.foreach(s => append(buffer, "", s))
+    } else {
+      f.getScalarFuncReturnType match {
+        case _: NullType =>
+        case other => append(buffer, "Returns:", other.sql)
+      }
+    }
+    if (isExtended) {
+      f.comment.foreach(c => append(buffer, "Comment:", c))
+      f.collation.foreach(c => append(buffer, "Collation:", c))
+      f.deterministic.foreach(d => append(buffer, "Deterministic:", d.toString))
+      f.containsSQL.foreach { c =>
+        val dataAccess = if (c) "CONTAINS SQL" else "READS SQL DATA"
+        append(buffer, "Data Access:", dataAccess)
+      }
+      val configs = f.getSQLConfigs
+      if (configs.nonEmpty) {
+        val sorted = configs.toSeq.sortBy(_._1).map { case (key, value) => s"$key=$value" }
+        append(buffer, "Configs:", sorted.head)
+        sorted.tail.foreach(s => append(buffer, "", s))
+      }
+      f.owner.foreach(o => append(buffer, "Owner:", o))
+      append(buffer, "Create Time:", new java.util.Date(f.createTimeMs).toString)
+      // Put the function body at the end of the description.
+      append(buffer, "Body:", f.exprText.orElse(f.queryText).get)
+      // Show the frozen SQL PATH if one was persisted at function creation time.
+      if (SQLConf.get.pathEnabled) {
+        f.functionStoredResolutionPath
+          .flatMap(SqlPathFormat.toDescribeJson)
+          .flatMap(SqlPathFormat.formatForDisplay)
+          .foreach(p => append(buffer, "SQL Path:", p))
+      }
+    }
+    val keys = tabulate(buffer.map(_._1).toSeq)
+    val values = buffer.map(_._2)
+    keys.zip(values).map { case (key, value) => Row(s"$key $value") }
+  }
+
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val identifier = if (info.getDb != null) {
       sparkSession.sessionState.catalog.qualifyIdentifier(
@@ -108,31 +203,23 @@ case class DescribeFunctionCommand(
     } else {
       FunctionIdentifier(info.getName)
     }
-    val name = identifier.unquotedString
-    val result = if (info.getClassName != null) {
-      Row(s"Function: $name") ::
-        Row(s"Class: ${info.getClassName}") ::
-        Row(s"Usage: ${info.getUsage}") :: Nil
+    if (SQLFunction.isSQLFunction(info.getClassName)) {
+      describeSQLFunction(info, identifier, sparkSession.sessionState.sqlParser)
     } else {
-      Row(s"Function: $name") :: Row(s"Usage: ${info.getUsage}") :: Nil
-    }
-
-    val sqlPathRows =
-      if (isExtended &&
-        sparkSession.sessionState.conf.pathEnabled &&
-        SQLFunction.isSQLFunction(info.getClassName)) {
-        DescribeFunctionCommandUtils
-          .storedResolutionPathString(sparkSession, identifier, info)
-          .map(s => Seq(Row(s"SQL Path: $s")))
-          .getOrElse(Nil)
+      val name = identifier.unquotedString
+      val result = if (info.getClassName != null) {
+        Row(s"Function: $name") ::
+          Row(s"Class: ${info.getClassName}") ::
+          Row(s"Usage: ${info.getUsage}") :: Nil
       } else {
-        Nil
+        Row(s"Function: $name") :: Row(s"Usage: ${info.getUsage}") :: Nil
       }
 
-    if (isExtended) {
-      (result ++ sqlPathRows) :+ Row(s"Extended Usage:${info.getExtended}")
-    } else {
-      result
+      if (isExtended) {
+        result :+ Row(s"Extended Usage:${info.getExtended}")
+      } else {
+        result
+      }
     }
   }
 }
@@ -167,9 +254,13 @@ case class DropFunctionCommand(
         identifier.funcName
       }
 
-      // Check if temp function exists first - if it does, allow dropping it even if a builtin
-      // with the same name exists (shadowing case)
-      if (!catalog.isTemporaryFunction(FunctionIdentifier(funcName)) &&
+      // Keep DROP TEMPORARY FUNCTION semantics consistent for unqualified names:
+      // - builtin name, no temp present, no IF EXISTS => FORBIDDEN_OPERATION
+      // - IF EXISTS => no-op
+      // Qualified temp namespaces (session / system.session) always target temp functions.
+      if (identifier.database.isEmpty &&
+          !ifExists &&
+          !catalog.isTemporaryFunction(FunctionIdentifier(funcName)) &&
           catalog.isBuiltinFunction(funcName)) {
         throw QueryCompilationErrors.cannotDropBuiltinFuncError(funcName)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala
index 8c21a908ddf32..3ae6dcad174fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala
@@ -21,9 +21,15 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{QueryPlanningTracker, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, SchemaUnsupported}
-import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HiveTableRelation, SessionCatalog}
+import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, View}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.connector.catalog.CatalogV2Util
 import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.metricview.serde.MetricView
 import org.apache.spark.sql.metricview.util.MetricViewPlanner
 import org.apache.spark.sql.types.StructType
 
@@ -39,32 +45,47 @@ case class CreateMetricViewCommand(
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val catalog = sparkSession.sessionState.catalog
-    val name = child match {
-      case v: ResolvedIdentifier =>
-        v.identifier.asTableIdentifier
+    child match {
+      case v: ResolvedIdentifier if CatalogV2Util.isSessionCatalog(v.catalog) =>
+        createMetricViewInSessionCatalog(sparkSession, v)
       case _ => throw SparkException.internalError(
         s"Failed to resolve identifier for creating metric view")
     }
-    val analyzed = MetricViewHelper.analyzeMetricViewText(sparkSession, name, originalText)
+  }
 
+  private def validateUserColumns(name: TableIdentifier, analyzed: LogicalPlan): Unit = {
     if (userSpecifiedColumns.nonEmpty) {
       if (userSpecifiedColumns.length > analyzed.output.length) {
         throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError(
-          name, userSpecifiedColumns.map(_._1), analyzed)
+          name.nameParts, userSpecifiedColumns.map(_._1), analyzed)
       } else if (userSpecifiedColumns.length < analyzed.output.length) {
         throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError(
-          name, userSpecifiedColumns.map(_._1), analyzed)
+          name.nameParts, userSpecifiedColumns.map(_._1), analyzed)
       }
     }
+  }
+
+  private def createMetricViewInSessionCatalog(
+      sparkSession: SparkSession,
+      resolved: ResolvedIdentifier): Seq[Row] = {
+    val catalog = sparkSession.sessionState.catalog
+    val name = resolved.identifier.asTableIdentifier
+    val (analyzed, metricView) = MetricViewHelper.analyzeMetricViewText(
+      sparkSession, name.nameParts, originalText)
+    validateUserColumns(name, analyzed)
+    // Merge the descriptor `metric_view.*` properties (`from.type`, `from.name`/`from.sql`,
+    // `where`) into the user-supplied properties so v1 DESCRIBE TABLE EXTENDED surfaces the
+    // same descriptor rows as the v2 path in `DataSourceV2Strategy`.
+    val mergedProps = properties ++ metricView.getProperties
     catalog.createTable(
       ViewHelper.prepareTable(
         sparkSession, name, Some(originalText), analyzed, userSpecifiedColumns,
-        properties, SchemaUnsupported, comment,
+        mergedProps, SchemaUnsupported, comment,
         None, isMetricView = true),
       ignoreIfExists = allowExisting)
     Seq.empty
   }
+
   override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
     copy(child = newChild)
   }
@@ -73,24 +94,108 @@ case class CreateMetricViewCommand(
 case class AlterMetricViewCommand(child: LogicalPlan, originalText: String)
 
 object MetricViewHelper {
+
+  /**
+   * Walks the analyzed plan to collect direct table/view dependencies. Each dependency is
+   * returned as a structural multi-part name (`Seq[String]`); v1 sources (resolved through
+   * the session catalog) are normalized to a stable 3-part shape
+   * `[spark_catalog, db, table]` -- `TableIdentifier.nameParts` returns 1, 2, or 3 parts
+   * depending on whether the analyzer captured the catalog / database, so without
+   * normalization the same source can produce a different shape across runs. v2 sources
+   * already arrive fully qualified (catalog + namespace + table) and are returned as-is so
+   * multi-level namespaces survive.
+   *
+   * Stops recursion at relation leaf nodes and persistent `View` nodes so only direct
+   * (not transitive) dependencies are recorded.
+   */
+  private[execution] def collectTableDependencies(plan: LogicalPlan): Seq[Seq[String]] = {
+    val tables = scala.collection.mutable.ArrayBuffer.empty[Seq[String]]
+    def traverse(p: LogicalPlan): Unit = p match {
+      case v: View if !v.isTempView =>
+        tables += qualifyV1(v.desc.identifier.nameParts)
+      case r: DataSourceV2Relation if r.catalog.isDefined && r.identifier.isDefined =>
+        val ident = r.identifier.get
+        // V2 catalogs may have multi-level namespaces; preserve the full arity rather than
+        // dot-joining the namespace into a single component.
+        tables += (r.catalog.get.name() +: ident.namespace().toIndexedSeq) :+ ident.name()
+      case r: HiveTableRelation =>
+        tables += qualifyV1(r.tableMeta.identifier.nameParts)
+      case r: LogicalRelation if r.catalogTable.isDefined =>
+        tables += qualifyV1(r.catalogTable.get.identifier.nameParts)
+      case other =>
+        other.children.foreach(traverse)
+        other.expressions.foreach(_.foreach {
+          case s: SubqueryExpression => traverse(s.plan)
+          case _ =>
+        })
+    }
+    traverse(plan)
+    tables.distinct.toSeq
+  }
+
+  /**
+   * Normalizes v1 source identifiers to a stable 3-part `[spark_catalog, db, table]` shape.
+   * `TableIdentifier.nameParts` may return 1, 2, or 3 parts depending on whether the analyzer
+   * captured the catalog / database components, which would otherwise leak through to
+   * dependency consumers as nondeterministic arity.
+   */
+  private def qualifyV1(parts: Seq[String]): Seq[String] = parts match {
+    case Seq(t) => Seq(SESSION_CATALOG_NAME, SessionCatalog.DEFAULT_DATABASE, t)
+    case Seq(db, t) => Seq(SESSION_CATALOG_NAME, db, t)
+    case Seq(_, _, _) => parts
+    case other => other  // Unexpected arity; pass through unchanged.
+  }
+
+  /**
+   * Analyzes a metric-view YAML body so the create / alter path can capture the source plan
+   * and its dependencies. Returns the analyzed plan together with the parsed [[MetricView]]
+   * descriptor (returned alongside the placeholder by [[MetricViewPlanner.planWrite]] so
+   * callers needing the descriptor for property emission don't have to re-parse the YAML).
+   *
+   * `nameParts` is the multi-part target identifier (catalog + namespace + table). The synthetic
+   * [[CatalogTable]] used as analysis context still carries a [[TableIdentifier]] (capped at
+   * 3 parts: catalog + database + table); for multi-level v2 namespaces we collapse the
+   * intermediate namespace components into the synthetic `database` slot. The synthetic identifier
+   * is not used to resolve the view body itself, so this collapse is observationally invisible to
+   * the analyzed plan; `verifyTemporaryObjectsNotExists` continues to receive the full
+   * `nameParts` so error messages still render the multi-part form.
+   */
   def analyzeMetricViewText(
       session: SparkSession,
-      name: TableIdentifier,
-      viewText: String): LogicalPlan = {
+      nameParts: Seq[String],
+      viewText: String): (LogicalPlan, MetricView) = {
     val analyzer = session.sessionState.analyzer
+    val syntheticIdent = nameParts match {
+      case Seq(table) =>
+        TableIdentifier(table)
+      case Seq(db, table) =>
+        TableIdentifier(table, Some(db))
+      case parts =>
+        // 3+ parts: catalog is the head, table is the last, the middle (1..n-1) collapses
+        // into the synthetic `database` slot. We dot-join the intermediate components so a
+        // human inspecting the synthetic identifier can still see them.
+        TableIdentifier(
+          parts.last,
+          Some(parts.slice(1, parts.length - 1).mkString(".")),
+          Some(parts.head))
+    }
     // this metadata is used for analysis check, it'll be replaced during create/update with
     // more accurate information
     val tableMeta = CatalogTable(
-      identifier = name,
+      identifier = syntheticIdent,
       tableType = CatalogTableType.VIEW,
       storage = CatalogStorageFormat.empty,
       schema = new StructType(),
       viewOriginalText = Some(viewText),
       viewText = Some(viewText))
-    val metricViewNode = MetricViewPlanner.planWrite(
+    // `planWrite` returns the placeholder (carrying pre-parsed `inputColumns`) and the
+    // parsed YAML descriptor separately, so the caller can read the descriptor for
+    // property emission (e.g. `metric_view.*` keys) without keeping it on the placeholder.
+    val (placeholder, metricView) = MetricViewPlanner.planWrite(
       tableMeta, viewText, session.sessionState.sqlParser)
-    val analyzed = analyzer.executeAndCheck(metricViewNode, new QueryPlanningTracker)
-    ViewHelper.verifyTemporaryObjectsNotExists(isTemporary = false, name, analyzed, Seq.empty)
-    analyzed
+    val analyzed = analyzer.executeAndCheck(placeholder, new QueryPlanningTracker)
+    ViewHelper.verifyTemporaryObjectsNotExists(
+      isTemporary = false, nameParts, analyzed, Seq.empty)
+    (analyzed, metricView)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index c702ee3110fc0..9a1c9bfe1f59c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils, ResolveDefaultColumns}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY
 import org.apache.spark.sql.classic.ClassicConversions.castToImpl
-import org.apache.spark.sql.connector.catalog.{TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.{TableCatalog, V1Table, V1ViewInfo}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.CommandExecutionMode
@@ -104,7 +104,7 @@ case class CreateTableLikeCommand(
       provider
     } else if (fileFormat.inputFormat.isDefined) {
       Some(DDLUtils.HIVE_PROVIDER)
-    } else if (sourceTableDesc.tableType == CatalogTableType.VIEW) {
+    } else if (sourceTableDesc.isViewLike) {
       Some(sparkSession.sessionState.conf.defaultDataSourceName)
     } else {
       sourceTableDesc.provider
@@ -268,7 +268,7 @@ case class AlterTableAddColumnsCommand(
       table: TableIdentifier): CatalogTable = {
     val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)
 
-    if (catalogTable.tableType == CatalogTableType.VIEW) {
+    if (catalogTable.isViewLike) {
       throw QueryCompilationErrors.alterAddColNotSupportViewError(table)
     }
 
@@ -591,7 +591,9 @@ object ResolvedChildHelper {
     val catalog = sparkSession.sessionState.catalog
     child match {
       case ResolvedTempView(_, metadata) => metadata
-      case ResolvedPersistentView(_, _, metadata) => metadata
+      // v1 inspection commands always see a v1 (`V1ViewInfo`) view here -- the v2 strategy
+      // handles non-session views before this method is reached.
+      case ResolvedPersistentView(_, _, info: V1ViewInfo) => info.v1Table
       case ResolvedTable(_, _, t: V1Table, _) => t.v1Table
       case _ if (catalog.isTempView(table)) =>
           catalog.getTempViewOrPermanentTableMetadata(table)
@@ -729,7 +731,7 @@ case class DescribeTableCommand(
       catalog: SessionCatalog,
       metadata: CatalogTable,
       result: ArrayBuffer[Row]): Unit = {
-    if (metadata.tableType == CatalogTableType.VIEW) {
+    if (metadata.isViewLike) {
       throw QueryCompilationErrors.descPartitionNotAllowedOnView(table.identifier)
     }
     DDLUtils.verifyPartitionProviderIsHive(spark, metadata, "DESC PARTITION")
@@ -1209,6 +1211,14 @@ case class ShowCreateTableCommand(
     } else {
       val tableMetadata = catalog.getTableRawMetadata(table)
 
+      // SHOW CREATE TABLE / VIEW does not have a WITH METRICS round-trippable form yet,
+      // so explicitly reject metric views rather than emit a misleading `CREATE VIEW`
+      // statement that loses the METRIC_VIEW kind. Tracked as follow-up.
+      if (tableMetadata.tableType == METRIC_VIEW) {
+        throw QueryCompilationErrors.showCreateTableNotSupportedOnMetricViewError(
+          table.identifier)
+      }
+
       // TODO: [SPARK-28692] unify this after we unify the
       //  CREATE TABLE syntax for hive serde and data source table.
       val metadata = if (DDLUtils.isDatasourceTable(tableMetadata)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala
index ad867e6537671..dc28d4a7e7f14 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/FetchCursorExec.scala
@@ -194,7 +194,8 @@ case class FetchCursorExec(
       case FakeLocalCatalog => scriptingVariableManager.get
 
       case FakeSystemCatalog if tempVariableManager.get(namePartsCaseAdjusted).isEmpty =>
-        throw unresolvedVariableError(namePartsCaseAdjusted, Seq("SYSTEM", "SESSION"))
+        throw unresolvedVariableError(
+          namePartsCaseAdjusted, Seq(Seq("SYSTEM", "SESSION")), varRef.origin)
 
       case FakeSystemCatalog => tempVariableManager
 
@@ -207,7 +208,7 @@ case class FetchCursorExec(
       Literal(value, varRef.dataType)
     )
 
-    variableManager.set(namePartsCaseAdjusted, varDef)
+    variableManager.set(namePartsCaseAdjusted, varDef, varRef.origin)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala
index ef8e238832b35..9861bd77616ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/SetVariableExec.scala
@@ -80,7 +80,8 @@ case class SetVariableExec(variables: Seq[VariableReference], query: SparkPlan)
       case FakeLocalCatalog => scriptingVariableManager.get
 
       case FakeSystemCatalog if tempVariableManager.get(namePartsCaseAdjusted).isEmpty =>
-        throw unresolvedVariableError(namePartsCaseAdjusted, Seq("SYSTEM", "SESSION"))
+        throw unresolvedVariableError(
+          namePartsCaseAdjusted, Seq(Seq("SYSTEM", "SESSION")), variable.origin)
 
       case FakeSystemCatalog => tempVariableManager
 
@@ -90,7 +91,7 @@ case class SetVariableExec(variables: Seq[VariableReference], query: SparkPlan)
     val varDef = VariableDefinition(
       variable.identifier, variable.varDef.defaultValueSQL, Literal(value, variable.dataType))
 
-    variableManager.set(namePartsCaseAdjusted, varDef)
+    variableManager.set(namePartsCaseAdjusted, varDef, variable.origin)
   }
 
   override def output: Seq[Attribute] = Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala
index 3a4d55169d900..d99ddae538a67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/VariableAssignmentUtils.scala
@@ -68,7 +68,8 @@ object VariableAssignmentUtils {
       case FakeLocalCatalog => scriptingVariableManager.get
 
       case FakeSystemCatalog if tempVariableManager.get(namePartsCaseAdjusted).isEmpty =>
-        throw unresolvedVariableError(namePartsCaseAdjusted, Seq("SYSTEM", "SESSION"))
+        throw unresolvedVariableError(
+          namePartsCaseAdjusted, Seq(Seq("SYSTEM", "SESSION")), varRef.origin)
 
       case FakeSystemCatalog => tempVariableManager
 
@@ -81,6 +82,6 @@ object VariableAssignmentUtils {
       Literal(value, varRef.dataType)
     )
 
-    variableManager.set(namePartsCaseAdjusted, varDef)
+    variableManager.set(namePartsCaseAdjusted, varDef, varRef.origin)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 895c39dd83976..411682f35f6df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -26,15 +26,15 @@ import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CapturesConfig, SQLConfHelper, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, ResolvedIdentifier, ResolvedPersistentView, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType}
 import org.apache.spark.sql.catalyst.analysis.V2TableReference
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, TemporaryViewRelation}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpression, VariableReference}
-import org.apache.spark.sql.catalyst.plans.logical.{AnalysisOnlyCommand, CreateTempView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.classic.ClassicConversions.castToImpl
-import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, Identifier, ViewCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.StaticSQLConf
@@ -47,6 +47,12 @@ import org.apache.spark.util.ArrayImplicits._
  * properties(e.g. view default database, view query output column names) and store them as
  * properties in metastore, if we need to create a permanent view.
  *
+ * Note: this is the v1 (session catalog) path. Permanent-view checks (no temp-object refs,
+ * no auto-generated aliases, no cycles) run at exec time here because Dataset-built commands
+ * can be constructed with `isAnalyzed=true` and bypass the analyzer's recapture path. The v2
+ * equivalent is [[org.apache.spark.sql.catalyst.plans.logical.CreateView]]; its checks run at
+ * analysis time via [[CheckViewReferences]]. Mirror any new validation in both places.
+ *
  * @param name the name of this view.
  * @param userSpecifiedColumns the output column names and optional comments specified by users,
  *                             can be Nil if not specified.
@@ -113,10 +119,10 @@ case class CreateViewCommand(
     if (userSpecifiedColumns.nonEmpty) {
       if (userSpecifiedColumns.length > analyzedPlan.output.length) {
         throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError(
-          name, userSpecifiedColumns.map(_._1), analyzedPlan)
+          name.nameParts, userSpecifiedColumns.map(_._1), analyzedPlan)
       } else if (userSpecifiedColumns.length < analyzedPlan.output.length) {
         throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError(
-          name, userSpecifiedColumns.map(_._1), analyzedPlan)
+          name.nameParts, userSpecifiedColumns.map(_._1), analyzedPlan)
       }
       if (viewSchemaMode == SchemaEvolution) {
         throw SparkException.internalError(
@@ -128,8 +134,9 @@ case class CreateViewCommand(
 
     // When creating a permanent view, not allowed to reference temporary objects.
     // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved)
-    verifyTemporaryObjectsNotExists(isTemporary, name, analyzedPlan, referredTempFunctions)
-    verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary, name)
+    verifyTemporaryObjectsNotExists(
+      isTemporary, name.nameParts, analyzedPlan, referredTempFunctions)
+    verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary, name.nameParts)
 
     SchemaUtils.checkIndeterminateCollationInSchema(plan.schema)
 
@@ -165,12 +172,14 @@ case class CreateViewCommand(
       if (allowExisting) {
         // Handles `CREATE VIEW IF NOT EXISTS v0 AS SELECT ...`. Does nothing when the target view
         // already exists.
-      } else if (tableMetadata.tableType != CatalogTableType.VIEW) {
-        throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError(name, replace)
+      } else if (!tableMetadata.isViewLike) {
+        throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError(
+          name.nameParts, replace)
       } else if (replace) {
         // Detect cyclic view reference on CREATE OR REPLACE VIEW.
         val viewIdent = tableMetadata.identifier
-        checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
+        val viewFullIdent = tableMetadata.fullIdent
+        checkCyclicViewReference(analyzedPlan, Seq(viewFullIdent), viewFullIdent)
 
         // uncache the cached data before replacing an exists view
         logDebug(s"Try to uncache ${viewIdent.quotedString} before replacing.")
@@ -186,7 +195,7 @@ case class CreateViewCommand(
       } else {
         // Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already
         // exists.
-        throw QueryCompilationErrors.viewAlreadyExistsError(name)
+        throw QueryCompilationErrors.viewAlreadyExistsError(name.nameParts)
       }
     } else {
       // Create the view if it doesn't exist.
@@ -209,6 +218,12 @@ case class CreateViewCommand(
  * this command will try to alter a temporary view first, if view not exist, try permanent view
  * next, if still not exist, throw an exception.
  *
+ * Note: this is the v1 (session catalog) path. Permanent-view checks (no temp-object refs,
+ * no auto-generated aliases, no cycles) run at exec time here because Dataset-built commands
+ * can be constructed with `isAnalyzed=true` and bypass the analyzer's recapture path. The v2
+ * equivalent is [[org.apache.spark.sql.catalyst.plans.logical.AlterViewAs]]; its checks run at
+ * analysis time via [[CheckViewReferences]]. Mirror any new validation in both places.
+ *
  * @param name the name of this view.
  * @param originalText the original SQL text of this view. Note that we can only alter a view by
  *                     SQL API, which means we always have originalText.
@@ -242,8 +257,8 @@ case class AlterViewAsCommand(
 
   override def run(session: SparkSession): Seq[Row] = {
     val isTemporary = session.sessionState.catalog.isTempView(name)
-    verifyTemporaryObjectsNotExists(isTemporary, name, query, referredTempFunctions)
-    verifyAutoGeneratedAliasesNotExists(query, isTemporary, name)
+    verifyTemporaryObjectsNotExists(isTemporary, name.nameParts, query, referredTempFunctions)
+    verifyAutoGeneratedAliasesNotExists(query, isTemporary, name.nameParts)
     SchemaUtils.checkIndeterminateCollationInSchema(query.schema)
     if (isTemporary) {
       alterTemporaryView(session, query)
@@ -277,7 +292,8 @@ case class AlterViewAsCommand(
 
     // Detect cyclic view reference on ALTER VIEW.
     val viewIdent = viewMeta.identifier
-    checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
+    val viewFullIdent = viewMeta.fullIdent
+    checkCyclicViewReference(analyzedPlan, Seq(viewFullIdent), viewFullIdent)
 
     logDebug(s"Try to uncache ${viewIdent.quotedString} before replacing.")
     CommandUtils.uncacheTableOrView(session, viewIdent)
@@ -559,16 +575,16 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig {
    *
    * @param plan the logical plan we detect cyclic view references from.
    * @param path the path between the altered view and current node.
-   * @param viewIdent the table identifier of the altered view, we compare two views by the
-   *                  `desc.identifier`.
+   * @param viewIdent the full multi-part identifier of the altered view. We compare two views by
+   *                  `desc.fullIdent` so multi-level namespaces (v2 catalogs) are distinguished.
    */
   def checkCyclicViewReference(
       plan: LogicalPlan,
-      path: Seq[TableIdentifier],
-      viewIdent: TableIdentifier): Unit = {
+      path: Seq[Seq[String]],
+      viewIdent: Seq[String]): Unit = {
     plan match {
       case v: View =>
-        val ident = v.desc.identifier
+        val ident = v.desc.fullIdent
         val newPath = path :+ ident
         // If the table identifier equals to the `viewIdent`, current view node is the same with
         // the altered view. We detect a view reference cycle, should throw an AnalysisException.
@@ -594,12 +610,13 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig {
   }
 
   def verifyAutoGeneratedAliasesNotExists(
-      child: LogicalPlan, isTemporary: Boolean, name: TableIdentifier): Unit = {
+      child: LogicalPlan, isTemporary: Boolean, viewNameParts: Seq[String]): Unit = {
     if (!isTemporary && !conf.allowAutoGeneratedAliasForView) {
       child.output.foreach { attr =>
         if (attr.metadata.contains("__autoGeneratedAlias")) {
           throw QueryCompilationErrors
-            .notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError(name, attr)
+            .notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError(
+              viewNameParts, attr)
         }
       }
     }
@@ -610,7 +627,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig {
    */
   def verifyTemporaryObjectsNotExists(
       isTemporary: Boolean,
-      name: TableIdentifier,
+      viewNameParts: Seq[String],
       child: LogicalPlan,
       referredTempFunctions: Seq[String]): Unit = {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
@@ -618,16 +635,16 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig {
       val tempViews = collectTemporaryViews(child)
       tempViews.foreach { nameParts =>
         throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempViewError(
-          name, nameParts.quoted)
+          viewNameParts, nameParts.quoted)
       }
       referredTempFunctions.foreach { funcName =>
         throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempFuncError(
-          name, funcName)
+          viewNameParts, funcName)
       }
       val tempVars = collectTemporaryVariables(child)
       tempVars.foreach { nameParts =>
         throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempVarError(
-          name.nameParts, nameParts)
+          viewNameParts, nameParts)
       }
     }
   }
@@ -704,7 +721,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig {
       if (!storeAnalyzedPlanForView) {
         // Skip cyclic check because when stored analyzed plan for view, the depended
         // view is already converted to the underlying tables. So no cyclic views.
-        checkCyclicViewReference(analyzedPlan, Seq(name), name)
+        checkCyclicViewReference(analyzedPlan, Seq(name.nameParts), name.nameParts)
       }
       CommandUtils.uncacheTableOrView(session, name)
     }
@@ -836,24 +853,23 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig {
     if (originalText.isEmpty) {
       throw QueryCompilationErrors.createPersistedViewFromDatasetAPINotAllowedError()
     }
+    // For metric views, preserve the per-column metadata (`metric_view.type` / `metric_view.expr`)
+    // that the analyzer attaches to each dimension/measure `Alias`, even when the user supplies
+    // column names with comments.
     val aliasedSchema = CharVarcharUtils.getRawSchema(
-      aliasPlan(session, analyzedPlan, userSpecifiedColumns).schema, session.sessionState.conf)
+      aliasPlan(session, analyzedPlan, userSpecifiedColumns, retainMetadata = isMetricView).schema,
+      session.sessionState.conf)
     val newProperties = generateViewProperties(
       properties, session, analyzedPlan.schema.fieldNames, aliasedSchema.fieldNames, viewSchemaMode)
 
-    // Add property to indicate if this is a metric view
-    val finalProperties = if (isMetricView) {
-      newProperties + (CatalogTable.VIEW_WITH_METRICS -> "true")
-    } else {
-      newProperties
-    }
+    val tableType = if (isMetricView) CatalogTableType.METRIC_VIEW else CatalogTableType.VIEW
 
     CatalogTable(
       identifier = name,
-      tableType = CatalogTableType.VIEW,
+      tableType = tableType,
       storage = CatalogStorageFormat.empty,
       schema = aliasedSchema,
-      properties = finalProperties,
+      properties = newProperties,
       viewOriginalText = originalText,
       viewText = originalText,
       comment = comment,
@@ -864,21 +880,101 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig {
   /**
    * If `userSpecifiedColumns` is defined, alias the analyzed plan to the user specified columns,
    * else return the analyzed plan directly.
+   *
+   * When `retainMetadata` is true, any existing column metadata on the analyzed attribute
+   * (for example the `metric_view.type` / `metric_view.expr` keys the analyzer attaches to
+   * metric-view columns) is preserved in the re-aliased projection. The no-comment branch
+   * already preserves `attr.metadata` transitively via `child.metadata` on the new `Alias`;
+   * the comment branch needs an explicit merge because it sets `explicitMetadata` to a
+   * freshly constructed metadata object.
    */
   def aliasPlan(
       session: SparkSession,
       analyzedPlan: LogicalPlan,
-      userSpecifiedColumns: Seq[(String, Option[String])]): LogicalPlan = {
+      userSpecifiedColumns: Seq[(String, Option[String])],
+      retainMetadata: Boolean = false): LogicalPlan = {
     if (userSpecifiedColumns.isEmpty) {
       analyzedPlan
     } else {
       val projectList = analyzedPlan.output.zip(userSpecifiedColumns).map {
         case (attr, (colName, None)) => Alias(attr, colName)()
         case (attr, (colName, Some(colComment))) =>
-          val meta = new MetadataBuilder().putString("comment", colComment).build()
+          val builder = new MetadataBuilder()
+          if (retainMetadata) {
+            builder.withMetadata(attr.metadata)
+          }
+          val meta = builder.putString("comment", colComment).build()
           Alias(attr, colName)(explicitMetadata = Some(meta))
       }
       session.sessionState.executePlan(Project(projectList, analyzedPlan)).analyzed
     }
   }
 }
+
+/**
+ * Post-analysis check for v2 CREATE VIEW / ALTER VIEW. First rejects catalogs that do not
+ * implement [[ViewCatalog]] with `MISSING_CATALOG_ABILITY.VIEWS` -- we do this before the
+ * temp-object and auto-alias checks so a catalog that cannot host views at all surfaces the
+ * correct root cause instead of a misleading "references temp" error. Then rejects permanent
+ * views that reference temporary objects and view bodies with auto-generated aliases.
+ * `referredTempFunctions` is captured by the command's `markAsAnalyzed` before this rule runs.
+ * The v1 counterparts [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their existing
+ * exec-time checks -- Dataset-built commands bypass the analyzer's re-capture path, so the
+ * exec-time safety net must stay for v1.
+ */
+object CheckViewReferences extends (LogicalPlan => Unit) {
+  import ViewHelper._
+
+  // Extract (catalog, identifier) for the two resolved shapes view commands reach us with:
+  // `ResolvedIdentifier` for CREATE VIEW, `ResolvedPersistentView` for ALTER VIEW. Other shapes
+  // are an analyzer bug.
+  private def catalogAndIdent(resolved: LogicalPlan): (CatalogPlugin, Identifier) =
+    resolved match {
+      case ri: ResolvedIdentifier => (ri.catalog, ri.identifier)
+      case rpv: ResolvedPersistentView => (rpv.catalog, rpv.identifier)
+      case other =>
+        throw SparkException.internalError(
+          s"Unexpected child of view command: ${other.getClass.getName}")
+    }
+
+  private def fullIdentFor(resolved: LogicalPlan): Seq[String] = {
+    val (catalog, ident) = catalogAndIdent(resolved)
+    catalog.name() +: ident.asMultipartIdentifier
+  }
+
+  // Fail fast if the catalog cannot host views. Gate non-ViewCatalog plugins here so callers
+  // get the VIEWS-specific error rather than a generic cast failure later.
+  private def requireViewCatalog(resolved: LogicalPlan): Unit = {
+    val (catalog, _) = catalogAndIdent(resolved)
+    if (!catalog.isInstanceOf[ViewCatalog]) {
+      throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog)
+    }
+  }
+
+  override def apply(plan: LogicalPlan): Unit = plan.foreach {
+    case cv: CreateView if cv.isAnalyzed =>
+      requireViewCatalog(cv.child)
+      val fullIdent = fullIdentFor(cv.child)
+      verifyTemporaryObjectsNotExists(
+        isTemporary = false, fullIdent, cv.query, cv.referredTempFunctions)
+      verifyAutoGeneratedAliasesNotExists(cv.query, isTemporary = false, fullIdent)
+      // Cycles can only form when REPLACE'ing an existing view; a plain CREATE against an
+      // existing view fails earlier with `viewAlreadyExistsError` and against a non-existent
+      // view has nothing to cycle with.
+      if (cv.replace) {
+        checkCyclicViewReference(cv.query, Seq(fullIdent), fullIdent)
+      }
+
+    case av: AlterViewAs if av.isAnalyzed =>
+      // No capability check here: `Analyzer.lookupTableOrView(identifier, viewOnly=true)`
+      // already rejects non-ViewCatalog catalogs upstream for `UnresolvedView`, so by the time
+      // an AlterViewAs reaches this rule the catalog is guaranteed to be a ViewCatalog.
+      val fullIdent = fullIdentFor(av.child)
+      verifyTemporaryObjectsNotExists(
+        isTemporary = false, fullIdent, av.query, av.referredTempFunctions)
+      verifyAutoGeneratedAliasesNotExists(av.query, isTemporary = false, fullIdent)
+      checkCyclicViewReference(av.query, Seq(fullIdent), fullIdent)
+
+    case _ =>
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 9b51d3763abba..4a95f681fb6e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -420,6 +420,11 @@ case class DataSource(
           catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog &&
           catalogTable.get.partitionColumnNames.nonEmpty
         val (fileCatalog, dataSchema, partitionSchema) = if (useCatalogFileIndex) {
+          if (caseInsensitiveOptions.getOrElse(
+              FileIndexOptions.RECURSIVE_FILE_LOOKUP, "false").toBoolean) {
+            throw QueryCompilationErrors
+              .recursiveFileLookupNotSupportedForPartitionedDataSourceError()
+          }
           val defaultTableSize = conf.defaultSizeInBytes
           val index = new CatalogFileIndex(
             sparkSession,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index 8a254b464da71..42ed6d782e34b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -231,6 +231,11 @@ trait FileFormat {
    *
    * NOTE: Extractors are lazy, invoked only if the query actually selects their column at runtime.
    *
+   * Return types: extractors may return either a raw value (which is converted to the column's
+   * catalyst form via [[Literal.create]]) or an already-built [[Literal]] (whose `.value` is
+   * used directly). For complex types ([[ArrayType]] / [[MapType]] / [[StructType]]), return the
+   * value in catalyst form ([[ArrayData]] / [[MapData]] / [[InternalRow]]).
+   *
    * See also [[FileFormat.getFileConstantMetadataColumnValue]].
    */
   def fileConstantMetadataExtractors: Map[String, PartitionedFile => Any] =
@@ -273,6 +278,9 @@ object FileFormat {
     FileSourceConstantMetadataStructField(FILE_BLOCK_LENGTH, LongType, nullable = false),
     FileSourceConstantMetadataStructField(FILE_MODIFICATION_TIME, TimestampType, nullable = false))
 
+  private val BASE_METADATA_NAME_TO_TYPE: Map[String, DataType] =
+    BASE_METADATA_FIELDS.map(f => f.name -> f.dataType).toMap
+
   /**
    * All [[BASE_METADATA_FIELDS]] require custom extractors because they are derived directly from
    * fields of the [[PartitionedFile]], and do have entries in the file's metadata map.
@@ -299,16 +307,26 @@ object FileFormat {
    * If an extractor is available, apply it. Otherwise, look up the column's name in the file's
    * column value map and return the result (or null, if not found).
    *
-   * Raw values (including null) are automatically converted to literals as a courtesy.
+   * Raw values (including null) are converted via [[Literal.create]], which accepts catalyst-form
+   * values directly. This lets a complex constant metadata column return an [[ArrayData]] /
+   * [[MapData]] / [[InternalRow]] whose element types only the caller knows. If the extractor
+   * returns an already-built [[Literal]] (allowed by the extractor contract), its value is
+   * unwrapped before delegating to [[Literal.create]] so the dataType validation in the
+   * case-class constructor is checked against the raw value.
    */
   def getFileConstantMetadataColumnValue(
       name: String,
       file: PartitionedFile,
-      metadataExtractors: Map[String, PartitionedFile => Any]): Literal = {
+      metadataExtractors: Map[String, PartitionedFile => Any],
+      dataType: DataType): Literal = {
     val extractor = metadataExtractors.getOrElse(name,
       { pf: PartitionedFile => pf.otherConstantMetadataColumnValues.get(name).orNull }
     )
-    Literal(extractor.apply(file))
+    val rawValue = extractor.apply(file) match {
+      case lit: Literal => lit.value
+      case other => other
+    }
+    Literal.create(rawValue, dataType)
   }
 
   // create an internal row given required metadata fields and file information
@@ -334,7 +352,9 @@ object FileFormat {
       modificationTime = fileModificationTime,
       fileSize = fileSize,
       otherConstantMetadataColumnValues = Map.empty)
-    updateMetadataInternalRow(new GenericInternalRow(fieldNames.length), fieldNames, pf, extractors)
+    val fieldDataTypes = fieldNames.map(BASE_METADATA_NAME_TO_TYPE)
+    updateMetadataInternalRow(
+      new GenericInternalRow(fieldNames.length), fieldNames, pf, extractors, fieldDataTypes)
   }
 
   // update an internal row given required metadata fields and file information
@@ -342,9 +362,12 @@ object FileFormat {
       row: InternalRow,
       fieldNames: Seq[String],
       file: PartitionedFile,
-      metadataExtractors: Map[String, PartitionedFile => Any]): InternalRow = {
+      metadataExtractors: Map[String, PartitionedFile => Any],
+      fieldDataTypes: Seq[DataType]): InternalRow = {
+    require(fieldDataTypes.length == fieldNames.length,
+      s"fieldDataTypes length ${fieldDataTypes.length} != fieldNames length ${fieldNames.length}")
     fieldNames.zipWithIndex.foreach { case (name, i) =>
-      getFileConstantMetadataColumnValue(name, file, metadataExtractors) match {
+      getFileConstantMetadataColumnValue(name, file, metadataExtractors, fieldDataTypes(i)) match {
         case Literal(null, _) => row.setNullAt(i)
         case literal => row.update(i, literal.value)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 5dc13ccee9ce0..b591573c00afe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -27,12 +27,12 @@ import org.apache.hadoop.security.AccessControlException
 import org.apache.spark.{Partition => RDDPartition, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.LogKeys.{CURRENT_FILE, PATH}
+import org.apache.spark.memory.MemoryMode
 import org.apache.spark.paths.SparkPath
 import org.apache.spark.rdd.{InputFileBlockHolder, RDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericInternalRow, JoinedRow, Literal, UnsafeProjection, UnsafeRow}
-import org.apache.spark.sql.catalyst.types.PhysicalDataType
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.datasources.FileFormat._
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
@@ -89,6 +89,14 @@ class FileScanRDD(
 
   private val ignoreCorruptFiles = options.ignoreCorruptFiles
   private val ignoreMissingFiles = options.ignoreMissingFiles
+  // Evaluated on the driver (sparkSession is @transient) and serialized to executors so the
+  // `compute` iterator below can pass it through to ColumnVectorUtils.populate.
+  private val memoryMode: MemoryMode =
+    if (sparkSession.sessionState.conf.offHeapColumnVectorEnabled) {
+      MemoryMode.OFF_HEAP
+    } else {
+      MemoryMode.ON_HEAP
+    }
 
   override def compute(split: RDDPartition, context: TaskContext): Iterator[InternalRow] = {
     val iterator = new Iterator[Object] with AutoCloseable {
@@ -164,7 +172,8 @@ class FileScanRDD(
       private def updateMetadataRow(): Unit =
         if (metadataColumns.nonEmpty && currentFile != null) {
           updateMetadataInternalRow(
-            metadataRow, metadataColumns.map(_.name), currentFile, metadataExtractors)
+            metadataRow, metadataColumns.map(_.name), currentFile, metadataExtractors,
+            metadataColumns.map(_.dataType))
         }
 
       /**
@@ -174,16 +183,16 @@ class FileScanRDD(
         val tmpRow = new GenericInternalRow(1)
         metadataColumns.map { attr =>
           // Populate each metadata column by passing the resulting value through `tmpRow`.
-          getFileConstantMetadataColumnValue(attr.name, currentFile, metadataExtractors) match {
+          getFileConstantMetadataColumnValue(
+              attr.name, currentFile, metadataExtractors, attr.dataType) match {
             case Literal(null, _) =>
               tmpRow.setNullAt(0)
             case literal =>
-              require(PhysicalDataType(attr.dataType) == PhysicalDataType(literal.dataType))
               tmpRow.update(0, literal.value)
           }
 
           val columnVector = new ConstantColumnVector(c.numRows(), attr.dataType)
-          ColumnVectorUtils.populate(columnVector, tmpRow, 0)
+          ColumnVectorUtils.populate(columnVector, tmpRow, 0, memoryMode)
           columnVector
         }.toArray
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 1bf0d2f0301f2..8cea2c95e6940 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ArrayImplicits._
 
@@ -89,8 +90,7 @@ abstract class PartitioningAwareFileIndex(
         PartitionDirectory(InternalRow.empty, allFiles().toArray.filter(isNonEmptyFile))) :: Nil
     } else {
       if (recursiveFileLookup) {
-        throw new IllegalArgumentException(
-          "Datasource with partition do not allow recursive file loading.")
+        throw QueryCompilationErrors.recursiveFileLookupNotSupportedForPartitionedDataSourceError()
       }
       prunePartitions(partitionFilters, partitionSpec()).map {
         case PartitionPath(values, path) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 1b23fd1a5e829..202599a819d8d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -82,7 +82,7 @@ object SchemaPruning extends Rule[LogicalPlan] {
       val metadataSchema =
         relation.output.collect { case FileSourceMetadataAttribute(attr) => attr }.toStructType
       val prunedMetadataSchema = if (metadataSchema.nonEmpty) {
-        pruneSchema(metadataSchema, requestedRootFields)
+        pruneMetadataSchema(metadataSchema, requestedRootFields)
       } else {
         metadataSchema
       }
@@ -114,6 +114,44 @@ object SchemaPruning extends Rule[LogicalPlan] {
       fsRelation.fileFormat.isInstanceOf[ParquetFileFormat] ||
         fsRelation.fileFormat.isInstanceOf[OrcFileFormat])
 
+  /**
+   * Prunes a file-source metadata schema (one `StructType` containing each
+   * `FileSourceMetadataAttribute`). Unlike pruning a data file schema, this only prunes
+   * unused sibling sub-attributes (each is its own per-field extractor); kept sub-attributes'
+   * data types are preserved verbatim because the extractor produces a complete catalyst
+   * value, and shaving fields out would shift positions in that value.
+   */
+  private def pruneMetadataSchema(
+      metadataSchema: StructType,
+      requestedRootFields: Seq[RootField]): StructType = {
+    val resolver = conf.resolver
+    StructType(metadataSchema.map { topField =>
+      topField.dataType match {
+        case innerStruct: StructType =>
+          // Collect the requested sub-attribute names for this metadata attribute from the
+          // root field tree. Anything below those sub-attributes (e.g. nested struct/array
+          // element fields) is ignored, since extractor outputs aren't pruned.
+          val requestedSubNames: Set[String] = requestedRootFields.collect {
+            case rf if resolver(rf.field.name, topField.name) =>
+              rf.field.dataType match {
+                case rs: StructType => rs.fieldNames.toSet
+                case _ => Set.empty[String]
+              }
+          }.flatten.toSet
+          val keptSubFields = innerStruct.fields.filter { sub =>
+            requestedSubNames.exists(name => resolver(name, sub.name))
+          }
+          if (keptSubFields.length == innerStruct.fields.length) {
+            // Nothing to prune for this attribute; keep the original.
+            topField
+          } else {
+            topField.copy(dataType = StructType(keptSubFields))
+          }
+        case _ => topField
+      }
+    })
+  }
+
   /**
    * Normalizes the names of the attribute references in the given expressions to reflect
    * the names in the given logical relation. This makes it possible to compare attributes and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 16b25a9e6f709..425f98cad031f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -31,7 +31,6 @@ import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.{DataSourceMetricsMixin, ExternalEngineDatasourceRDD}
-import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
 import org.apache.spark.sql.types._
@@ -141,7 +140,7 @@ object JDBCRDD extends Logging {
    * @param options - JDBC options that contains url, table and other information.
    * @param outputSchema - The schema of the columns or aggregate columns to SELECT.
    * @param groupByColumns - The pushed down group by columns.
-   * @param sample - The pushed down tableSample.
+   * @param sampleClause - The pushed down table sample SQL clause.
    * @param limit - The pushed down limit. If the value is 0, it means no limit or limit
    *                is not pushed down.
    * @param sortOrders - The sort orders cooperates with limit to realize top N.
@@ -158,7 +157,7 @@ object JDBCRDD extends Logging {
       options: JDBCOptions,
       outputSchema: Option[StructType] = None,
       groupByColumns: Option[Array[String]] = None,
-      sample: Option[TableSampleInfo] = None,
+      sampleClause: Option[String] = None,
       limit: Int = 0,
       sortOrders: Array[String] = Array.empty[String],
       offset: Int = 0,
@@ -184,7 +183,7 @@ object JDBCRDD extends Logging {
       options,
       databaseMetadata = JDBCDatabaseMetadata.fromJDBCConnectionFactory(connectionFactory),
       groupByColumns,
-      sample,
+      sampleClause,
       limit,
       sortOrders,
       offset,
@@ -209,7 +208,7 @@ class JDBCRDD(
     options: JDBCOptions,
     databaseMetadata: JDBCDatabaseMetadata,
     groupByColumns: Option[Array[String]],
-    sample: Option[TableSampleInfo],
+    sampleClause: Option[String],
     limit: Int,
     sortOrders: Array[String],
     offset: Int,
@@ -252,8 +251,8 @@ class JDBCRDD(
       builder = builder.withGroupByColumns(groupByKeys)
     }
 
-    sample.foreach { tableSampleInfo =>
-      builder = builder.withTableSample(tableSampleInfo)
+    sampleClause.foreach { clause =>
+      builder = builder.withTableSampleClause(clause)
     }
 
     builder.build()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 05e30207314a7..972bb3e35ee6f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, Timesta
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.jdbc.JdbcDialects
@@ -314,7 +313,7 @@ private[sql] case class JDBCRelation(
       finalSchema: StructType,
       predicates: Array[Predicate],
       groupByColumns: Option[Array[String]],
-      tableSample: Option[TableSampleInfo],
+      tableSampleClause: Option[String],
       limit: Int,
       sortOrders: Array[String],
       offset: Int): RDD[Row] = {
@@ -328,7 +327,7 @@ private[sql] case class JDBCRelation(
       jdbcOptions,
       Some(finalSchema),
       groupByColumns,
-      tableSample,
+      tableSampleClause,
       limit,
       sortOrders,
       offset,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index ab98f55e72296..a00618a1e2e24 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -516,7 +516,8 @@ object ParquetFileFormat extends Logging {
       partFiles: Seq[FileStatus],
       ignoreCorruptFiles: Boolean,
       ignoreMissingFiles: Boolean = false): Seq[Footer] = {
-    ThreadUtils.parmap(partFiles, "readingParquetFooters", 8) { currentFile =>
+    ThreadUtils.parmap(partFiles, "readingParquetFooters", 8,
+        preserveSparkThrowable = true) { currentFile =>
       try {
         // Skips row group information since we only need the schema.
         // ParquetFileReader.readFooter throws RuntimeException, instead of IOException,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index f253cbd0d0d3d..85cc504573231 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -43,7 +43,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.{DataSourceUtils, VariantMetadata}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{BinaryView, UTF8String, VariantVal}
 import org.apache.spark.util.collection.Utils
 
 /**
@@ -415,8 +415,8 @@ private[parquet] class ParquetRowConverter(
       case geom: GeometryType =>
         new ParquetGeometryConverter(geom.srid, updater)
 
-      case _: GeographyType =>
-        new ParquetGeographyConverter(updater)
+      case geog: GeographyType =>
+        new ParquetGeographyConverter(geog.srid, updater)
 
       // As long as the parquet type is INT64 timestamp, whether logical annotation
       // `isAdjustedToUTC` is false or true, it will be read as Spark's TimestampLTZ type
@@ -619,12 +619,12 @@ private[parquet] class ParquetRowConverter(
   }
 
   /**
-   * Parquet converter for strings. A dictionary is used to minimize string decoding cost.
+   * Parquet converter for geometries. A dictionary is used to minimize WKB decoding cost.
    */
   private final class ParquetGeometryConverter(srid: Int, updater: ParentContainerUpdater)
       extends ParquetPrimitiveConverter(updater) {
 
-    private var expandedDictionary: Array[GeometryVal] = null
+    private var expandedDictionary: Array[BinaryView] = null
 
     override def hasDictionarySupport: Boolean = true
 
@@ -655,18 +655,18 @@ private[parquet] class ParquetRowConverter(
   }
 
   /**
-   * Parquet converter for strings. A dictionary is used to minimize string decoding cost.
+   * Parquet converter for geographies. A dictionary is used to minimize WKB decoding cost.
    */
-  private final class ParquetGeographyConverter(updater: ParentContainerUpdater)
+  private final class ParquetGeographyConverter(srid: Int, updater: ParentContainerUpdater)
       extends ParquetPrimitiveConverter(updater) {
 
-    private var expandedDictionary: Array[GeographyVal] = null
+    private var expandedDictionary: Array[BinaryView] = null
 
     override def hasDictionarySupport: Boolean = true
 
     override def setDictionary(dictionary: Dictionary): Unit = {
       this.expandedDictionary = Array.tabulate(dictionary.getMaxId + 1) { i =>
-        STUtils.stGeogFromWKB(dictionary.decodeToBinary(i).getBytesUnsafe)
+        STUtils.stGeogFromWKB(dictionary.decodeToBinary(i).getBytesUnsafe, srid)
       }
     }
 
@@ -678,15 +678,15 @@ private[parquet] class ParquetRowConverter(
       val buffer = value.toByteBuffer
       val numBytes = buffer.remaining()
 
-      val geometry = if (buffer.hasArray) {
+      val geography = if (buffer.hasArray) {
         val array = buffer.array()
         val offset = buffer.arrayOffset() + buffer.position()
-        STUtils.stGeogFromWKB(array.slice(offset, offset + numBytes))
+        STUtils.stGeogFromWKB(array.slice(offset, offset + numBytes), srid)
       } else {
-        STUtils.stGeogFromWKB(value.getBytesUnsafe)
+        STUtils.stGeogFromWKB(value.getBytesUnsafe, srid)
       }
 
-      updater.set(geometry)
+      updater.set(geography)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
index b84148992e32b..d7c9a24245e5f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
@@ -280,14 +280,14 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
         (row: SpecializedGetters, ordinal: Int) =>
           // Data is written to Parquet using the WKB format, as per spec:
           // https://parquet.apache.org/docs/file-format/types/geospatial/.
-          val wkb = STUtils.stAsBinary(row.getGeometry(ordinal))
+          val wkb = STUtils.stGeomAsBinary(row.getBinaryView(ordinal))
           recordConsumer.addBinary(Binary.fromReusedByteArray(wkb))
 
       case _: GeographyType =>
         (row: SpecializedGetters, ordinal: Int) =>
           // Data is written to Parquet using the WKB format, as per spec:
           // https://parquet.apache.org/docs/file-format/types/geospatial/.
-          val wkb = STUtils.stAsBinary(row.getGeography(ordinal))
+          val wkb = STUtils.stGeogAsBinary(row.getBinaryView(ordinal))
           recordConsumer.addBinary(Binary.fromReusedByteArray(wkb))
 
       case DecimalType.Fixed(precision, scale) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index d1f61599e7ac8..7122dd52ef1a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.InsertableRelation
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, MetadataBuilder, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.ArrayImplicits._
@@ -169,7 +169,7 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical
       val tableName = tableIdentWithDB.unquotedString
       val existingTable = catalog.getTableMetadata(tableIdentWithDB)
 
-      if (existingTable.tableType == CatalogTableType.VIEW) {
+      if (existingTable.isViewLike) {
         throw QueryCompilationErrors.saveDataIntoViewNotAllowedError()
       }
 
@@ -528,7 +528,7 @@ object PreprocessTableInsertion extends ResolveInsertionBase {
         query,
         byName,
         conf,
-        supportColDefaultValue = true)
+        TableOutputResolver.DefaultValueFillMode.FILL)
     } catch {
       case e: AnalysisException if staticPartCols.nonEmpty &&
         (e.getCondition == "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS" ||
@@ -737,19 +737,6 @@ case class QualifyLocationWithWarehouse(catalog: SessionCatalog) extends Rule[Lo
  * It does so by walking the resolved plan looking for View operators for persisted views.
  */
 object ViewSyncSchemaToMetaStore extends (LogicalPlan => Unit) {
-
-  /**
-   * Checks if comment changes between view and table should trigger schema sync.
-   * When preserveUserComments flag is enabled, comment differences should NOT trigger sync
-   * because we want to preserve user-set view comments.
-   */
-  private def shouldTriggerRedoOnCommentChange(
-      viewField: StructField,
-      tableField: StructField,
-      preserveUserComments: Boolean): Boolean = {
-    !preserveUserComments && viewField.getComment() != tableField.getComment()
-  }
-
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
       case View(metaData, false, viewQuery, _)
@@ -768,44 +755,19 @@ object ViewSyncSchemaToMetaStore extends (LogicalPlan => Unit) {
           (field.dataType != planField.dataType ||
             field.nullable != planField.nullable ||
             (viewSchemaMode == SchemaEvolution && (
-              field.name != planField.name ||
-                shouldTriggerRedoOnCommentChange(
-                  field,
-                  planField,
-                  session.sessionState.conf.viewSchemaEvolutionPreserveUserComments))))
+              field.getComment() != planField.getComment() ||
+              field.name != planField.name)))
         }
 
-        lazy val viewFieldsByName = viewFields.map(f => f.name -> f).toMap
-
         if (redo) {
           val newSchema = if (viewSchemaMode == SchemaTypeEvolution) {
             val newFields = viewQuery.schema.map {
               case StructField(name, dataType, nullable, _) =>
                 StructField(name, dataType, nullable,
-                  viewFieldsByName(name).metadata)
-            }
-            StructType(newFields)
-          } else if (session.sessionState.conf.viewSchemaEvolutionPreserveUserComments) {
-            // Adopt types/nullable/names from query, but preserve view comments.
-            val newFields = viewQuery.schema.map { planField =>
-              val newMetadata = viewFieldsByName.get(planField.name) match {
-                case Some(viewField) =>
-                  // Use table metadata but override with view comment
-                  val builder = new MetadataBuilder().withMetadata(planField.metadata)
-                  viewField.getComment() match {
-                    case Some(comment) => builder.putString("comment", comment)
-                    case None => builder.remove("comment")
-                  }
-                  builder.build()
-                case None =>
-                  // New column, use table metadata as-is
-                  planField.metadata
-              }
-              StructField(planField.name, planField.dataType, planField.nullable, newMetadata)
+                  viewFields.find(_.name == name).get.metadata)
             }
             StructType(newFields)
           } else {
-            // Legacy behavior: adopt everything from table including comments.
             viewQuery.schema
           }
           SchemaUtils.checkColumnNameDuplication(fieldNames.toImmutableArraySeq,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala
new file mode 100644
index 0000000000000..2309cb31b5ebe
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, SchemaEvolution, ViewSchemaMode}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, TableCatalog, ViewCatalog, ViewInfo}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, MultipartIdentifierHelper}
+import org.apache.spark.sql.execution.command.CommandUtils
+
+/**
+ * Shared bits for the v2 ALTER VIEW ... AS exec. The replacement [[ViewInfo]] is constructed by
+ * [[V2ViewPreparation.buildViewInfo]]; the existing view's payload is provided at analysis time
+ * via the `existingView` field so we can preserve user-set TBLPROPERTIES, comment, collation,
+ * owner, and schema binding mode without re-loading at runtime.
+ *
+ * Transient fields (SQL configs, query column names) are re-captured from the current session,
+ * matching v1 `AlterViewAsCommand.alterPermanentView`. PROP_OWNER and user TBLPROPERTIES flow
+ * through unchanged. If the view has been dropped or replaced with a non-view table between
+ * analysis and exec, the catalog's `replaceView` surfaces `NoSuchViewException` and the error
+ * propagates.
+ */
+private[v2] trait V2AlterViewPreparation extends V2ViewPreparation {
+  protected def existingView: ViewInfo
+
+  protected lazy val existingProps: Map[String, String] =
+    existingView.properties.asScala.toMap
+
+  private def existingProp(key: String): Option[String] = existingProps.get(key)
+
+  // ALTER VIEW ... AS does not accept a user column list.
+  override def userSpecifiedColumns: Seq[(String, Option[String])] = Seq.empty
+  override def comment: Option[String] = existingProp(TableCatalog.PROP_COMMENT)
+  override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION)
+  // Preserve the existing view's owner (v1-parity with AlterViewAsCommand's viewMeta.copy,
+  // which leaves `owner` untouched). If the existing view has no PROP_OWNER, pass it through
+  // as None so the replacement ViewInfo also has no owner.
+  override def owner: Option[String] = existingProp(TableCatalog.PROP_OWNER)
+  override def userProperties: Map[String, String] = existingProps
+
+  // Preserve the existing view's schema binding mode. Reuse `viewSchemaModeFromProperties`
+  // for a v1-identical decode -- it honors `viewSchemaBindingEnabled` and defaults missing
+  // values to SchemaBinding. We feed the typed `ViewInfo.schemaMode` String in via a
+  // single-key map so the decode logic stays in one place.
+  override def viewSchemaMode: ViewSchemaMode =
+    CatalogTable.viewSchemaModeFromProperties(
+      Option(existingView.schemaMode)
+        .map(CatalogTable.VIEW_SCHEMA_MODE -> _)
+        .toMap)
+}
+
+/**
+ * Physical plan node for ALTER VIEW ... AS on a v2 [[ViewCatalog]]. Dispatches to
+ * [[ViewCatalog#replaceView]], which is contractually atomic.
+ */
+case class AlterV2ViewExec(
+    catalog: ViewCatalog,
+    identifier: Identifier,
+    existingView: ViewInfo,
+    originalText: String,
+    query: LogicalPlan) extends V2AlterViewPreparation {
+
+  override protected def run(): Seq[InternalRow] = {
+    val info = buildViewInfo()
+    // Cyclic reference detection is done at analysis time in CheckViewReferences.
+    CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier))
+    catalog.replaceView(identifier, info)
+    Seq.empty
+  }
+}
+
+/**
+ * Physical plan node for ALTER VIEW ... SET TBLPROPERTIES on a v2 [[ViewCatalog]]. Merges the
+ * user-supplied properties on top of the analysis-time view properties and dispatches to
+ * [[ViewCatalog#replaceView]] -- views carry no data, so a single atomic-swap call is sufficient.
+ */
+case class AlterV2ViewSetPropertiesExec(
+    catalog: ViewCatalog,
+    identifier: Identifier,
+    existingView: ViewInfo,
+    properties: Map[String, String]) extends LeafV2CommandExec {
+
+  override def output: Seq[org.apache.spark.sql.catalyst.expressions.Attribute] = Seq.empty
+
+  override protected def run(): Seq[InternalRow] = {
+    val merged = existingView.properties.asScala.toMap ++ properties
+    val info = CatalogV2Util.viewInfoBuilderFrom(existingView)
+      .withProperties(merged.asJava)
+      .build()
+    // Match v1 `AlterTableSetPropertiesCommand`'s `invalidateCachedTable` so cached query
+    // plans referencing the view drop their stale entries.
+    CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier))
+    catalog.replaceView(identifier, info)
+    Seq.empty
+  }
+}
+
+/**
+ * Physical plan node for ALTER VIEW ... UNSET TBLPROPERTIES on a v2 [[ViewCatalog]]. Drops the
+ * listed property keys from the analysis-time view properties and dispatches to
+ * [[ViewCatalog#replaceView]]. Missing keys are silently dropped, matching v1
+ * `AlterTableUnsetPropertiesCommand` for views (`ifExists` is unused on the view path -- the
+ * v1 view command never errors on missing keys).
+ */
+case class AlterV2ViewUnsetPropertiesExec(
+    catalog: ViewCatalog,
+    identifier: Identifier,
+    existingView: ViewInfo,
+    propertyKeys: Seq[String]) extends LeafV2CommandExec {
+
+  override def output: Seq[org.apache.spark.sql.catalyst.expressions.Attribute] = Seq.empty
+
+  override protected def run(): Seq[InternalRow] = {
+    val remaining = existingView.properties.asScala.toMap -- propertyKeys
+    val info = CatalogV2Util.viewInfoBuilderFrom(existingView)
+      .withProperties(remaining.asJava)
+      .build()
+    CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier))
+    catalog.replaceView(identifier, info)
+    Seq.empty
+  }
+}
+
+/**
+ * Physical plan node for ALTER VIEW ... WITH SCHEMA BINDING on a v2 [[ViewCatalog]]. Replaces
+ * the schema-binding mode on the analysis-time view payload and dispatches to
+ * [[ViewCatalog#replaceView]]. The view body itself is not re-analyzed -- only the binding mode
+ * field changes. When the new mode is EVOLUTION, `queryColumnNames` is also cleared, mirroring
+ * v1 `generateViewProperties` -- in EVOLUTION mode the view always uses its current schema as
+ * the column source, so leaving stale `queryColumnNames` would produce non-canonical persisted
+ * metadata.
+ */
+case class AlterV2ViewSchemaBindingExec(
+    catalog: ViewCatalog,
+    identifier: Identifier,
+    existingView: ViewInfo,
+    viewSchemaMode: ViewSchemaMode) extends LeafV2CommandExec {
+
+  override def output: Seq[org.apache.spark.sql.catalyst.expressions.Attribute] = Seq.empty
+
+  override protected def run(): Seq[InternalRow] = {
+    val builder = CatalogV2Util.viewInfoBuilderFrom(existingView)
+      .withSchemaMode(viewSchemaMode.toString)
+    if (viewSchemaMode == SchemaEvolution) {
+      builder.withQueryColumnNames(Array.empty[String])
+    }
+    val info = builder.build()
+    CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier))
+    catalog.replaceView(identifier, info)
+    Seq.empty
+  }
+}
+
+/**
+ * Physical plan node for ALTER VIEW ... RENAME TO on a v2 [[ViewCatalog]]. Dispatches to
+ * [[ViewCatalog#renameView]]; if the source view is missing or has been replaced with a non-view
+ * table between analysis and exec, the catalog throws `NoSuchViewException` and the error
+ * propagates.
+ *
+ * If the view was cached at the old identifier, the cache entry is captured before the rename
+ * and re-instated at the new identifier afterwards -- matches v1 `AlterTableRenameCommand` and
+ * v2 `RenameTableExec`, so users on a v2 view catalog who explicitly cached a view do not
+ * silently lose the cache after a rename.
+ */
+case class RenameV2ViewExec(
+    catalog: ViewCatalog,
+    oldIdent: Identifier,
+    newIdent: Identifier) extends LeafV2CommandExec {
+
+  override def output: Seq[org.apache.spark.sql.catalyst.expressions.Attribute] = Seq.empty
+
+  override protected def run(): Seq[InternalRow] = {
+    // If the new identifier consists of a name only, rename in place within the source
+    // namespace -- matches `RenameTableExec`'s v1-parity behavior.
+    val qualifiedNewIdent = if (newIdent.namespace.isEmpty) {
+      Identifier.of(oldIdent.namespace, newIdent.name)
+    } else newIdent
+
+    // Capture the old view's storage level before uncaching, mirroring v1
+    // `AlterTableRenameCommand`. Resolving the old identifier via `session.table` runs through
+    // view-text expansion so the cache lookup keys off the same plan that was originally
+    // cached via `CACHE TABLE <view>`.
+    val oldQualified = (catalog.name() +: oldIdent.asMultipartIdentifier).quoted
+    val optStorageLevel = session.sharedState.cacheManager
+      .lookupCachedData(session.table(oldQualified))
+      .map(_.cachedRepresentation.cacheBuilder.storageLevel)
+
+    CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, oldIdent))
+    catalog.invalidateView(oldIdent)
+    catalog.renameView(oldIdent, qualifiedNewIdent)
+
+    optStorageLevel.foreach { storageLevel =>
+      val newQualified = (catalog.name() +: qualifiedNewIdent.asMultipartIdentifier).quoted
+      session.catalog.cacheTable(newQualified, storageLevel)
+    }
+    Seq.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index 28f4d12d366b2..5f18e76375210 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -28,6 +28,9 @@ import org.apache.spark.sql.catalyst.plans.physical.{KeyedPartitioning, SinglePa
 import org.apache.spark.sql.catalyst.util.{truncatedString, InternalRowComparableWrapper}
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.connector.write.RowLevelOperation.Command.DELETE
+import org.apache.spark.sql.connector.write.RowLevelOperationTable
+import org.apache.spark.sql.execution.metric.{SQLLastAttemptMetrics, SQLMetric, SQLMetrics}
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -44,6 +47,20 @@ case class BatchScanExec(
 
   @transient lazy val batch: Batch = if (scan == null) null else scan.toBatch
 
+  override protected lazy val sparkMetrics: Map[String, SQLMetric] = {
+    val name = "number of output rows"
+    val metric = table match {
+      // Use SLAM for the scan-output count when this scan reads on behalf of a row-level DELETE,
+      // so that the driver-side derivation `numDeletedRows = numScannedRows - numCopiedRows` in
+      // `ReplaceDataExec.getWriteSummary` stays correct under stage retries.
+      case rlot: RowLevelOperationTable if rlot.operation.command() == DELETE =>
+        SQLLastAttemptMetrics.createMetric(sparkContext, name)
+      case _ =>
+        SQLMetrics.createMetric(sparkContext, name)
+    }
+    Map("numOutputRows" -> metric)
+  }
+
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
     case other: BatchScanExec =>
@@ -129,7 +146,7 @@ case class BatchScanExec(
       new DataSourceRDD(
         sparkContext, filteredPartitions, readerFactory, supportsColumnar, customMetrics)
     }
-    postDriverMetrics()
+    postDriverMetrics(scan.reportDriverMetrics())
     rdd
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
index e9e5f0f3175cb..f4e5db1536178 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
@@ -65,7 +65,7 @@ case class ContinuousScanExec(
       schema,
       readerFactory,
       customMetrics)
-    postDriverMetrics()
+    postDriverMetrics(scan.reportDriverMetrics())
     inputRDD
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
index 02197a76aa1b8..95edbba62dcb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import scala.jdk.CollectionConverters.MapHasAsJava
+import scala.util.control.NonFatal
 
 import org.apache.spark.internal.LogKeys.NAMESPACE
 import org.apache.spark.sql.catalyst.InternalRow
@@ -46,6 +47,19 @@ case class CreateNamespaceExec(
       case _: NamespaceAlreadyExistsException if ifNotExists =>
         logWarning(log"Namespace ${MDC(NAMESPACE, namespace.quoted)} was created concurrently. " +
           log"Ignoring.")
+      case NonFatal(e) if ifNotExists =>
+        // Some catalogs validate the request (e.g. ACLs, properties) before checking existence,
+        // so creating a pre-existing namespace can surface errors unrelated to the "already
+        // exists" condition the caller intends to ignore under IF NOT EXISTS. If the namespace
+        // really does exist, treat the operation as a no-op; otherwise propagate the original
+        // error.
+        val exists = try catalog.namespaceExists(ns) catch { case NonFatal(_) => false }
+        if (exists) {
+          logWarning(log"Namespace ${MDC(NAMESPACE, namespace.quoted)} already exists; " +
+            log"swallowing underlying error under IF NOT EXISTS.", e)
+        } else {
+          throw e
+        }
     }
 
     Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2MetricViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2MetricViewExec.scala
new file mode 100644
index 0000000000000..fb27e9feaa0b1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2MetricViewExec.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.CurrentUserContext
+import org.apache.spark.sql.catalyst.analysis.{SchemaUnsupported, ViewSchemaMode}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{DependencyList, Identifier, TableSummary, ViewCatalog}
+
+/**
+ * Physical plan node for `CREATE VIEW ... WITH METRICS` on a v2 [[ViewCatalog]]. Inherits the
+ * shared CREATE-side `run()` (viewExists short-circuit, OR REPLACE, cross-type collision
+ * decoding) from [[V2CreateViewPreparation]]; only supplies the metric-view-specific bits
+ * (no collation, schema-mode UNSUPPORTED, typed view dependencies, `PROP_TABLE_TYPE =
+ * METRIC_VIEW`) via the [[V2ViewPreparation]] hooks.
+ *
+ * Routed by [[DataSourceV2Strategy]] from
+ * [[org.apache.spark.sql.metricview.logical.CreateMetricView]] when the resolved catalog
+ * is a non-session v2 catalog.
+ */
+case class CreateV2MetricViewExec(
+    catalog: ViewCatalog,
+    identifier: Identifier,
+    userSpecifiedColumns: Seq[(String, Option[String])],
+    comment: Option[String],
+    userProperties: Map[String, String],
+    originalText: String,
+    query: LogicalPlan,
+    allowExisting: Boolean,
+    replace: Boolean,
+    deps: Option[DependencyList]) extends V2CreateViewPreparation {
+
+  // Metric views don't carry a default-collation override.
+  override def collation: Option[String] = None
+
+  // CREATE stamps the current user, matching the v1 metric-view path (which goes through
+  // ViewHelper.prepareTable -> CatalogTable.owner default) and CreateV2ViewExec.
+  override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser)
+
+  // Metric views always have schema-mode UNSUPPORTED (mirroring the v1 path which passes
+  // SchemaUnsupported into ViewHelper.prepareTable).
+  override def viewSchemaMode: ViewSchemaMode = SchemaUnsupported
+
+  override protected def viewDependencies: Option[DependencyList] = deps
+
+  override protected def tableType: Option[String] =
+    Some(TableSummary.METRIC_VIEW_TABLE_TYPE)
+
+  // The analyzer attaches `metric_view.type` / `metric_view.expr` keys to each output
+  // attribute's metadata; `aliasPlan`'s default re-projection drops them when the user
+  // supplies a column-rename clause. Mirror v1 `ViewHelper.prepareTable(isMetricView = true)`
+  // by retaining metadata across the rename.
+  override protected def retainColumnMetadata: Boolean = true
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala
new file mode 100644
index 0000000000000..4e10c7d3ab284
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.{CurrentUserContext, InternalRow}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, SchemaEvolution, ViewAlreadyExistsException, ViewSchemaMode}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.connector.catalog.{DependencyList, Identifier, TableCatalog, ViewCatalog, ViewInfo}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper}
+import org.apache.spark.sql.util.SchemaUtils
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * Shared validation + ViewInfo construction for v2 CREATE VIEW / ALTER VIEW execs.
+ *
+ * Mirrors the persistent-view portion of v1 [[ViewHelper.prepareTable]] + the execution-time
+ * checks in [[org.apache.spark.sql.execution.command.CreateViewCommand.run]]. Post-analysis
+ * checks for temp-object references and auto-generated aliases run once for both v1 and v2 in
+ * [[org.apache.spark.sql.execution.command.CheckViewReferences]].
+ */
+private[v2] trait V2ViewPreparation extends LeafV2CommandExec {
+  def catalog: ViewCatalog
+  def identifier: Identifier
+  def userSpecifiedColumns: Seq[(String, Option[String])]
+  def comment: Option[String]
+  def collation: Option[String]
+  def owner: Option[String]
+  def userProperties: Map[String, String]
+  def originalText: String
+  def query: LogicalPlan
+  def viewSchemaMode: ViewSchemaMode
+
+  // Full multi-part identifier used for error rendering. Built once so we can avoid routing
+  // through the lossy v1 `TableIdentifier` for multi-level-namespace v2 catalogs.
+  protected lazy val fullNameParts: Seq[String] =
+    (catalog.name() +: identifier.asMultipartIdentifier).toSeq
+
+  /** Optional structured dependency list to stamp on the built `ViewInfo`. */
+  protected def viewDependencies: Option[DependencyList] = None
+
+  /** Optional view sub-kind to stamp on `PROP_TABLE_TYPE`; defaults to `VIEW` when `None`. */
+  protected def tableType: Option[String] = None
+
+  /**
+   * Whether `aliasPlan` should preserve any column metadata the analyzer attached to the
+   * source plan when re-aliasing user-specified column names. Plain views default to `false`
+   * (matches v1 `CreateViewCommand`); metric views override to `true` so the analyzer-injected
+   * `metric_view.type` / `metric_view.expr` keys survive a `CREATE VIEW <ident>(c1, c2, ...)`
+   * column rename (matches v1 `ViewHelper.prepareTable(isMetricView = true)`).
+   */
+  protected def retainColumnMetadata: Boolean = false
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  protected def buildViewInfo(): ViewInfo = {
+    import ViewHelper._
+
+    if (userSpecifiedColumns.nonEmpty) {
+      if (userSpecifiedColumns.length > query.output.length) {
+        throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError(
+          fullNameParts, userSpecifiedColumns.map(_._1), query)
+      } else if (userSpecifiedColumns.length < query.output.length) {
+        throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError(
+          fullNameParts, userSpecifiedColumns.map(_._1), query)
+      }
+      if (viewSchemaMode == SchemaEvolution) {
+        throw SparkException.internalError(
+          "View with user column list has viewSchemaMode EVOLUTION")
+      }
+    }
+
+    SchemaUtils.checkIndeterminateCollationInSchema(query.schema)
+
+    val aliasedSchema = CharVarcharUtils.getRawSchema(
+      aliasPlan(session, query, userSpecifiedColumns, retainMetadata = retainColumnMetadata)
+        .schema,
+      session.sessionState.conf)
+    SchemaUtils.checkColumnNameDuplication(
+      aliasedSchema.fieldNames.toImmutableArraySeq, session.sessionState.conf.resolver)
+
+    val manager = session.sessionState.catalogManager
+    val queryColumnNames = if (viewSchemaMode == SchemaEvolution) {
+      Array.empty[String]
+    } else {
+      query.output.map(_.name).toArray
+    }
+
+    val builder = new ViewInfo.Builder()
+      .withSchema(aliasedSchema)
+      .withProperties(userProperties.asJava)
+      .withQueryText(originalText)
+      .withCurrentCatalog(manager.currentCatalog.name)
+      .withCurrentNamespace(manager.currentNamespace)
+      .withSqlConfigs(sqlConfigsToProps(session.sessionState.conf, "").asJava)
+      .withSchemaMode(viewSchemaMode.toString)
+      .withQueryColumnNames(queryColumnNames)
+    // CREATE stamps the current user into PROP_OWNER (matching v2 CREATE TABLE via
+    // CatalogV2Util.withDefaultOwnership and v1 CREATE VIEW via CatalogTable.owner's default);
+    // ALTER preserves the existing view's owner (v1-parity with AlterViewAsCommand's
+    // viewMeta.copy). Both cases are expressed via the `owner` hook provided by the subclass.
+    owner.foreach(builder.withOwner)
+    comment.foreach(builder.withComment)
+    collation.foreach(builder.withCollation)
+    viewDependencies.foreach(builder.withViewDependencies)
+    tableType.foreach(builder.withTableType)
+    builder.build()
+  }
+
+  protected def viewAlreadyExists(): Throwable =
+    QueryCompilationErrors.viewAlreadyExistsError(fullNameParts)
+}
+
+/**
+ * Shared CREATE-side `run()` for v2 view-create execs. Adds the `IF NOT EXISTS` short-circuit
+ * via [[ViewCatalog#viewExists]], dispatches `OR REPLACE` to
+ * [[ViewCatalog#createOrReplaceView]] vs. plain CREATE to [[ViewCatalog#createView]], and
+ * decodes `ViewAlreadyExistsException` into the dedicated cross-type collision error when a
+ * non-view table sits at the ident in a mixed catalog. Subclasses supply only the
+ * view-shape-specific fields (`allowExisting`, `replace`, plus the [[V2ViewPreparation]] hooks
+ * such as `viewDependencies` / `tableType`) and inherit `run()` unchanged.
+ */
+private[v2] trait V2CreateViewPreparation extends V2ViewPreparation {
+  def allowExisting: Boolean
+  def replace: Boolean
+
+  override final protected def run(): Seq[InternalRow] = {
+    // CREATE VIEW IF NOT EXISTS: short-circuit before `buildViewInfo` if a view already sits
+    // at the ident -- avoids `aliasPlan` / config capture for the common no-op case (matches
+    // v1 `CreateViewCommand.run`). The mixed-catalog "table at ident" no-op is handled in the
+    // catch block below; that case is rare enough that paying for `buildViewInfo` is fine.
+    if (allowExisting && catalog.viewExists(identifier)) return Seq.empty
+
+    val info = buildViewInfo()
+    try {
+      if (replace) {
+        CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier))
+        catalog.createOrReplaceView(identifier, info)
+      } else {
+        catalog.createView(identifier, info)
+      }
+    } catch {
+      case _: ViewAlreadyExistsException =>
+        // Catalog refused: something already occupies the ident. Decode whether it's a table
+        // (cross-type collision) or a view (race for plain CREATE / OR REPLACE), and emit the
+        // precise error -- or no-op for IF NOT EXISTS.
+        val isTable = catalog match {
+          case tc: TableCatalog => tc.tableExists(identifier)
+          case _ => false
+        }
+        if (isTable) {
+          if (!allowExisting) {
+            throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError(
+              fullNameParts, replace)
+          }
+          // CREATE VIEW IF NOT EXISTS over a table is a no-op (v1 parity).
+        } else if (!allowExisting) {
+          throw viewAlreadyExists()
+        }
+        // else: a view appeared between our viewExists probe and createView; IF NOT EXISTS
+        // semantics make this a no-op.
+    }
+    Seq.empty
+  }
+}
+
+/**
+ * Physical plan node for CREATE VIEW on a v2 [[ViewCatalog]]. Inherits the create-side
+ * `run()` (viewExists short-circuit + OR REPLACE + cross-type decoding) from
+ * [[V2CreateViewPreparation]]; only supplies the case-class fields and stamps the current
+ * user as owner.
+ */
+case class CreateV2ViewExec(
+    catalog: ViewCatalog,
+    identifier: Identifier,
+    userSpecifiedColumns: Seq[(String, Option[String])],
+    comment: Option[String],
+    collation: Option[String],
+    userProperties: Map[String, String],
+    originalText: String,
+    query: LogicalPlan,
+    allowExisting: Boolean,
+    replace: Boolean,
+    viewSchemaMode: ViewSchemaMode) extends V2CreateViewPreparation {
+
+  override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
index 0bec918039775..f00d8b9b82cb4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
@@ -24,23 +24,22 @@ import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.KeyedPartitioning
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, PartitionReaderFactory, Scan}
-import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode, SafeForKWayMerge, SQLExecution}
-import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode, SafeForKWayMerge}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.internal.connector.SupportsMetadata
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
 
-trait DataSourceV2ScanExecBase extends LeafExecNode with SafeForKWayMerge {
+trait DataSourceV2ScanExecBase
+  extends LeafExecNode
+  with SafeForKWayMerge
+  with SupportsCustomDriverMetrics {
 
-  lazy val customMetrics = scan.supportedCustomMetrics().map { customMetric =>
-    customMetric.name() -> SQLMetrics.createV2CustomMetric(sparkContext, customMetric)
-  }.toMap
+  override lazy val customMetrics: Map[String, SQLMetric] =
+    createCustomMetrics(scan.supportedCustomMetrics())
 
-  override lazy val metrics = {
-    Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) ++
-      customMetrics
-  }
+  override protected lazy val sparkMetrics: Map[String, SQLMetric] =
+    Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
   def scan: Scan
 
@@ -145,18 +144,6 @@ trait DataSourceV2ScanExecBase extends LeafExecNode with SafeForKWayMerge {
     }
   }
 
-  protected def postDriverMetrics(): Unit = {
-    val driveSQLMetrics = scan.reportDriverMetrics().map(customTaskMetric => {
-      val metric = metrics(customTaskMetric.name())
-      metric.set(customTaskMetric.value())
-      metric
-    })
-
-    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
-      driveSQLMetrics.toImmutableArraySeq)
-  }
-
   override def doExecuteColumnar(): RDD[ColumnarBatch] = {
     val numOutputRows = longMetric("numOutputRows")
     inputRDD.asInstanceOf[RDD[ColumnarBatch]].map { b =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 6730673cab025..4fd7d993cc3d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -24,7 +24,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.{SparkException, SparkIllegalArgumentException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.LogKeys.EXPR
-import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, ResolvedNamespace, ResolvedPartitionSpec, ResolvedPersistentView, ResolvedTable, ResolvedTempView}
+import org.apache.spark.sql.catalyst.analysis.{NamedRelation, ResolvedIdentifier, ResolvedNamespace, ResolvedPartitionSpec, ResolvedPersistentView, ResolvedTable, ResolvedTempView}
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruning, Expression, NamedExpression, Not, Or, PredicateHelper, SubqueryExpression}
@@ -32,23 +32,24 @@ import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder}
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder}
 import org.apache.spark.sql.classic.SparkSession
-import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Dependency, DependencyList, Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableSummary, TruncatableTable, V1Table, V1ViewInfo, ViewCatalog}
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.index.SupportsIndex
 import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue}
 import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate}
 import org.apache.spark.sql.connector.read.LocalScan
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream, SupportsRealTimeMode}
-import org.apache.spark.sql.connector.write.V1Write
+import org.apache.spark.sql.connector.write.{V1Write, Write}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.{FilterExec, InSubqueryExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, ScalarSubquery => ExecScalarSubquery, SparkPlan, SparkStrategy => Strategy}
-import org.apache.spark.sql.execution.command.CommandUtils
+import org.apache.spark.sql.execution.command.{CommandUtils, MetricViewHelper}
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRelationWithTable, PushableColumnAndNestedColumn}
 import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
+import org.apache.spark.sql.metricview.logical.CreateMetricView
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.ArrayImplicits._
@@ -103,6 +104,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       location, session.sharedState.hadoopConf)
   }
 
+  // Strategy cases that target v2 views read `ResolvedPersistentView.info` directly. For
+  // session-catalog (v1) views the payload is a `V1ViewInfo` wrapping the original
+  // `CatalogTable`; v2 catalogs supply a regular `ViewInfo` from the catalog.
+  // `ResolveSessionCatalog` rewrites session-catalog views to v1 commands before this strategy
+  // fires, so v2 cases that don't expect a `V1ViewInfo` won't see one.
+
   private def qualifyLocInTableSpec(tableSpec: TableSpec): TableSpec = {
     val newLoc = tableSpec.location.map { loc =>
       val locationUri = CatalogUtils.stringToURI(loc)
@@ -259,13 +266,18 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
 
     // CREATE TABLE ... LIKE ... for a v2 catalog target.
     // Source is an already-resolved Table object; no extra catalog round-trip is needed.
-    // Views are wrapped in V1Table so the exec can extract schema and provider uniformly.
+    // Views are wrapped in V1Table so the exec can extract schema and provider uniformly --
+    // session-catalog (v1) views unwrap to their original `CatalogTable`; non-session v2
+    // views go through `V1Table.toCatalogTable` to synthesize an equivalent `CatalogTable`
+    // from the resolved `ViewInfo`.
     case CreateTableLike(
         ResolvedIdentifier(catalog, ident), source,
         locationStr, provider, serdeInfo, properties, ifNotExists) =>
       val table = source match {
         case ResolvedTable(_, _, t, _) => t
-        case ResolvedPersistentView(_, _, meta) => V1Table(meta)
+        case ResolvedPersistentView(_, _, info: V1ViewInfo) => V1Table(info.v1Table)
+        case rpv @ ResolvedPersistentView(viewCatalog, viewIdent, _) =>
+          V1Table(V1Table.toCatalogTable(viewCatalog, viewIdent, rpv.info))
         case ResolvedTempView(_, meta) => V1Table(meta)
       }
       val location = locationStr.map { loc =>
@@ -301,6 +313,162 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             qualifyLocInTableSpec(tableSpec), orCreate = orCreate, invalidateCache) :: Nil
       }
 
+    // CheckViewReferences guarantees the catalog is a ViewCatalog by the time these strategy
+    // cases fire (it throws MISSING_CATALOG_ABILITY.VIEWS otherwise).
+    case CreateView(ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment,
+        collation, properties, originalText, child, allowExisting, replace, viewSchemaMode,
+        _, _) =>
+      val sqlText = originalText.getOrElse {
+        throw QueryCompilationErrors.createPersistedViewFromDatasetAPINotAllowedError()
+      }
+      CreateV2ViewExec(catalog.asInstanceOf[ViewCatalog], ident, userSpecifiedColumns, comment,
+        collation, properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil
+
+    // CREATE VIEW ... WITH METRICS on a non-session v2 catalog. Routes the metric-view path
+    // through `CreateV2MetricViewExec`, which extends `V2ViewPreparation` to share the
+    // `IF NOT EXISTS` short-circuit, `OR REPLACE`, and cross-type-collision decoding with
+    // `CreateV2ViewExec`. Session-catalog dispatch happens earlier in `ResolveSessionCatalog`,
+    // which rewrites `CreateMetricView` (the parser's v1/v2-agnostic logical plan) to
+    // `CreateMetricViewCommand` for v1 execution.
+    case CreateMetricView(
+        ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment, properties,
+        originalText, allowExisting, replace) if !CatalogV2Util.isSessionCatalog(catalog) =>
+      val viewCatalog = catalog match {
+        case vc: ViewCatalog => vc
+        case _ => throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog)
+      }
+      // Parse + analyze the YAML body here (during planning). This mirrors the v1 path's
+      // late analysis in `CreateMetricViewCommand.run` -- the metric-view source plan is not
+      // a SQL string, so it can't ride along as a regular `query` `LogicalPlan` field on the
+      // logical command the way `CreateView` does. Pass the full multi-part name so v2 metric
+      // views with multi-level-namespace targets analyze correctly (`asTableIdentifier` would
+      // throw `requiresSinglePartNamespaceError` for namespace arity > 1).
+      val nameParts = (catalog.name() +: ident.namespace().toIndexedSeq) :+ ident.name()
+      val (analyzed, metricView) = MetricViewHelper.analyzeMetricViewText(
+        session, nameParts, originalText)
+      val mergedProps = properties ++ metricView.getProperties
+      val depParts = MetricViewHelper.collectTableDependencies(analyzed)
+      // Always emit a `Some(DependencyList)` for metric views (even when `depParts` is empty,
+      // e.g. `SQLSource("SELECT 1 AS x")`): per `DependencyList`'s contract, `null` means
+      // "no dependency list was supplied" while an empty list means "supplied but the
+      // object has none". Metric-view CREATE always *computes* deps, so the right empty
+      // representation is `Some(empty list)`, not `None`.
+      val sparkDeps: Array[Dependency] =
+        depParts.map(parts => Dependency.table(parts.toArray): Dependency).toArray
+      val deps = Some(DependencyList.of(sparkDeps))
+      CreateV2MetricViewExec(viewCatalog, ident, userSpecifiedColumns, comment, mergedProps,
+        originalText, analyzed, allowExisting, replace, deps) :: Nil
+
+    case AlterViewAs(rpv @ ResolvedPersistentView(catalog, ident, _),
+        originalText, query, _, _) =>
+      AlterV2ViewExec(catalog.asInstanceOf[ViewCatalog], ident, rpv.info,
+        originalText, query) :: Nil
+
+    // View DDL / inspection on a non-session v2 catalog that the v1 rewrite in
+    // `ResolveSessionCatalog` can't handle (its `ResolvedViewIdentifier` matcher is gated on
+    // `isSessionCatalog`). Routed to dedicated v2 execs that read the typed `ViewInfo`
+    // resolved at analysis time directly from `ResolvedPersistentView.info` -- no re-loading
+    // at exec time.
+    case SetViewProperties(rpv @ ResolvedPersistentView(catalog, ident, _), props) =>
+      AlterV2ViewSetPropertiesExec(
+        catalog.asInstanceOf[ViewCatalog], ident, rpv.info, props) :: Nil
+
+    case UnsetViewProperties(rpv @ ResolvedPersistentView(catalog, ident, _), keys, _) =>
+      AlterV2ViewUnsetPropertiesExec(
+        catalog.asInstanceOf[ViewCatalog], ident, rpv.info, keys) :: Nil
+
+    case AlterViewSchemaBinding(rpv @ ResolvedPersistentView(catalog, ident, _), schemaMode) =>
+      AlterV2ViewSchemaBindingExec(
+        catalog.asInstanceOf[ViewCatalog], ident, rpv.info, schemaMode) :: Nil
+
+    case RenameTable(ResolvedPersistentView(catalog, ident, _), newName, isView) =>
+      // Reject `ALTER TABLE <view> RENAME TO ...` -- the syntax says TABLE, but the resolved
+      // child is a view. Matches the v1 runtime check in `DDLUtils.verifyAlterTableType`.
+      if (!isView) {
+        throw QueryCompilationErrors.cannotAlterViewWithAlterTableError(ident.name())
+      }
+      RenameV2ViewExec(
+        catalog.asInstanceOf[ViewCatalog], ident, newName.asIdentifier) :: Nil
+
+    case ShowCreateTable(rpv @ ResolvedPersistentView(catalog, ident, _), _, _)
+        if rpv.info.properties.get(TableCatalog.PROP_TABLE_TYPE) ==
+          TableSummary.METRIC_VIEW_TABLE_TYPE =>
+      // SHOW CREATE TABLE on a metric view is explicitly unsupported: `ShowCreateV2ViewExec`
+      // would emit a plain `CREATE VIEW <ident> AS <yaml>`, which is not a round-trippable
+      // metric-view DDL form (the right form is `CREATE VIEW <ident> WITH METRICS LANGUAGE
+      // YAML AS $$ <yaml> $$`). Reject up front with the same dedicated error class the v1
+      // path uses (`UNSUPPORTED_SHOW_CREATE_TABLE.ON_METRIC_VIEW`) so users see the same
+      // actionable message regardless of catalog kind.
+      val quoted = (catalog.name() +: ident.asMultipartIdentifier)
+        .map(quoteIfNeeded).mkString(".")
+      throw QueryCompilationErrors.showCreateTableNotSupportedOnMetricViewError(quoted)
+
+    case ShowCreateTable(rpv @ ResolvedPersistentView(catalog, ident, _), _, output) =>
+      val quoted = (catalog.name() +: ident.asMultipartIdentifier).map(quoteIfNeeded).mkString(".")
+      ShowCreateV2ViewExec(output, quoted, rpv.info) :: Nil
+
+    case ShowTableProperties(rpv @ ResolvedPersistentView(catalog, ident, _),
+        propertyKey, output) =>
+      val quoted = (catalog.name() +: ident.asMultipartIdentifier).map(quoteIfNeeded).mkString(".")
+      ShowV2ViewPropertiesExec(output, quoted, rpv.info, propertyKey) :: Nil
+
+    case ShowColumns(rpv @ ResolvedPersistentView(_, ident, _), ns, output) =>
+      // If `SHOW COLUMNS IN <view> FROM <ns>` was written with both the view's namespace and
+      // an explicit `FROM <ns>`, validate they agree -- mirrors the v1 rewrite in
+      // `ResolveSessionCatalog`. For multi-level v2 namespaces we compare the full namespace
+      // sequence (case-insensitively) rather than v1's single-part `database` check.
+      ns.foreach { nsSeq =>
+        val resolver = session.sessionState.conf.resolver
+        val viewNs = ident.namespace().toSeq
+        val mismatch = viewNs.length != nsSeq.length ||
+          viewNs.zip(nsSeq).exists { case (a, b) => !resolver(a, b) }
+        if (mismatch) {
+          throw QueryCompilationErrors.showColumnsWithConflictNamespacesError(nsSeq, viewNs)
+        }
+      }
+      ShowV2ViewColumnsExec(output, rpv.info) :: Nil
+
+    case DescribeRelation(rpv @ ResolvedPersistentView(catalog, ident, _), isExtended, output) =>
+      DescribeV2ViewExec(output, catalog.name(), ident, rpv.info, isExtended) :: Nil
+
+    case DescribeColumn(rpv @ ResolvedPersistentView(_, _, _), column, isExtended, output) =>
+      // `ResolvedPersistentView.output` exposes the view's schema, so `ResolveReferences`
+      // resolves the column against it -- meaning we typically receive an `Attribute` here.
+      // Accept the legacy `UnresolvedAttribute` form too. The unwrap logic is shared with the
+      // v1 rewrite for session-catalog views in `ResolveSessionCatalog`.
+      DescribeV2ViewColumnExec(
+        output, rpv.info, DescribeColumn.extractColumnNameParts(column), isExtended) :: Nil
+
+    // Plans that resolve through `UnresolvedTableOrView` reach here with a
+    // `ResolvedPersistentView` child for non-session v2 views (the v1 rewrite in
+    // `ResolveSessionCatalog` no longer matches them because `ResolvedViewIdentifier` is gated
+    // on `isSessionCatalog`). Pin each with `UNSUPPORTED_FEATURE.TABLE_OPERATION` so users get
+    // a clean `AnalysisException` instead of a generic "No plan for ..." assertion from the
+    // planner. Tracked for follow-up real handlers in SPARK-52729.
+    case RefreshTable(ResolvedPersistentView(catalog, ident, _)) =>
+      throw QueryCompilationErrors.unsupportedTableOperationError(
+        catalog, ident, "REFRESH TABLE")
+
+    case AnalyzeTable(ResolvedPersistentView(catalog, ident, _), _, _) =>
+      throw QueryCompilationErrors.unsupportedTableOperationError(
+        catalog, ident, "ANALYZE TABLE")
+
+    case AnalyzeColumn(ResolvedPersistentView(catalog, ident, _), _, _) =>
+      throw QueryCompilationErrors.unsupportedTableOperationError(
+        catalog, ident, "ANALYZE TABLE ... FOR COLUMNS")
+
+    // SHOW PARTITIONS on a view is already rejected during analysis: the parser uses
+    // `UnresolvedTable` (not `UnresolvedTableOrView`), so `CheckAnalysis` surfaces
+    // `EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE` before planning. No strategy case needed.
+
+    // DROP VIEW on a non-session ViewCatalog. The v1 rewrite in `ResolveSessionCatalog` skips
+    // ViewCatalog catalogs, so they fall through here. `DropViewExec` calls
+    // `ViewCatalog.dropView` and surfaces `EXPECT_VIEW_NOT_TABLE` if the identifier resolves to
+    // a table in a mixed catalog.
+    case DropView(r @ ResolvedIdentifier(catalog: ViewCatalog, ident), ifExists) =>
+      val invalidateFunc = () => CommandUtils.uncacheTableOrView(session, r)
+      DropViewExec(catalog, ident, ifExists, invalidateFunc) :: Nil
+
     case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident),
         parts, query, tableSpec: TableSpec, options, orCreate, true) =>
       catalog match {
@@ -326,8 +494,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             invalidateCache) :: Nil
       }
 
-    case AppendData(r @ ExtractV2Table(v1: SupportsWrite), _, _,
-        _, _, Some(write), analyzedQuery) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
+    case AppendWrite(r @ ExtractV2Table(v1: SupportsWrite), Some(write), analyzedQuery)
+        if v1.supports(TableCapability.V1_BATCH_WRITE) =>
       write match {
         case v1Write: V1Write =>
           assert(analyzedQuery.isDefined)
@@ -338,7 +506,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
 
     case AppendData(r: DataSourceV2Relation, query, _, _, _, Some(write), _) =>
-      AppendDataExec(planLater(query), refreshCache(r), write) :: Nil
+      AppendDataExec(planLater(query), refreshCache(r), write, r.name) :: Nil
+
+    case InsertOnlyMerge(r: DataSourceV2Relation, query, Some(write), _) =>
+      InsertOnlyMergeExec(planLater(query), refreshCache(r), write, r.name) :: Nil
 
     case OverwriteByExpression(r @ ExtractV2Table(v1: SupportsWrite), _, _,
         _, _, _, Some(write), analyzedQuery) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
@@ -353,10 +524,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
 
     case OverwriteByExpression(
         r: DataSourceV2Relation, _, query, _, _, _, Some(write), _) =>
-      OverwriteByExpressionExec(planLater(query), refreshCache(r), write) :: Nil
+      OverwriteByExpressionExec(planLater(query), refreshCache(r), write, r.name) :: Nil
 
     case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _, _, _, Some(write)) =>
-      OverwritePartitionsDynamicExec(planLater(query), refreshCache(r), write) :: Nil
+      OverwritePartitionsDynamicExec(planLater(query), refreshCache(r), write, r.name) :: Nil
 
     case DeleteFromTableWithFilters(r: DataSourceV2Relation, filters) =>
       DeleteFromTableExec(r.table.asDeletable, filters.toArray, refreshCache(r)) :: Nil
@@ -402,16 +573,18 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         refreshCache(r), // use the original relation to refresh the cache
         projections,
         write,
-        rd.operation.command) :: Nil
+        rd.operation.command,
+        r.name) :: Nil
 
     case wd @ WriteDelta(_: DataSourceV2Relation, _, query, r: DataSourceV2Relation, projections,
-        Some(write)) =>
+        _, Some(write)) =>
       WriteDeltaExec(
         planLater(query),
         refreshCache(r), // use the original relation to refresh the cache
         projections,
         write,
-        wd.operation.command) :: Nil
+        wd.operation.command,
+        r.name) :: Nil
 
     case MergeRows(isSourceRowPresent, isTargetRowPresent, matchedInstructions,
         notMatchedInstructions, notMatchedBySourceInstructions, checkCardinality, output, child) =>
@@ -426,7 +599,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       DescribeNamespaceExec(output, catalog.asNamespaceCatalog, ns, extended) :: Nil
 
     case DescribeRelation(r: ResolvedTable, isExtended, output) =>
-      DescribeTableExec(output, r.table, isExtended) :: Nil
+      DescribeTableExec(output, r.catalog.name(), r.identifier, r.table, isExtended) :: Nil
 
     case DescribeTablePartition(r: ResolvedTable, part, isExtended, output) =>
       DescribeTablePartitionExec(output, r.table.asPartitionable, r.identifier,
@@ -443,7 +616,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
 
     case DropTable(r: ResolvedIdentifier, ifExists, purge) =>
       val invalidateFunc = () => CommandUtils.uncacheTableOrView(session, r)
-      DropTableExec(r.catalog.asTableCatalog, r.identifier, ifExists, purge, invalidateFunc) :: Nil
+      DropTableExec(
+        r.catalog.asTableCatalog, r.identifier, ifExists, purge, invalidateFunc) :: Nil
 
     case _: NoopCommand =>
       LocalTableScanExec(Nil, Nil, None) :: Nil
@@ -493,6 +667,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case ShowTables(ResolvedNamespace(catalog, ns, _), pattern, output) =>
       ShowTablesExec(output, catalog.asTableCatalog, ns, pattern) :: Nil
 
+    // SHOW VIEWS on a v2 ViewCatalog. `ResolveSessionCatalog` rewrites the SHOW VIEWS plan to
+    // v1 `ShowViewsCommand` only when the catalog is NOT a `ViewCatalog`; non-`ViewCatalog`
+    // catalogs (session or not) are rejected with `MISSING_CATALOG_ABILITY.VIEWS` there. So
+    // this case sees `ViewCatalog` catalogs (typically non-session, since the default
+    // `V2SessionCatalog` is not a `ViewCatalog`; a session-catalog override that mixes in
+    // `ViewCatalog` would also reach here).
+    case ShowViews(ResolvedNamespace(catalog: ViewCatalog, ns, _), pattern, output) =>
+      ShowViewsExec(output, catalog, ns, pattern) :: Nil
+
     case ShowTablesExtended(
         ResolvedNamespace(catalog, ns, _),
         pattern,
@@ -598,7 +781,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
 
     case r: CacheTableAsSelect =>
       CacheTableAsSelectExec(
-        r.tempViewName, r.plan, r.originalText, r.isLazy, r.options, r.referredTempFunctions) :: Nil
+        r.tempViewNameString, r.plan, r.originalText, r.isLazy, r.options,
+        r.referredTempFunctions) :: Nil
 
     case r: UncacheTable =>
       def isTempView(table: LogicalPlan): Boolean = table match {
@@ -666,6 +850,20 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
   }
 }
 
+/**
+ * Pattern that matches either an [[AppendData]] or an [[InsertOnlyMerge]] and exposes the
+ * fields needed to plan the v1 batch-write fallback path.
+ */
+private object AppendWrite {
+  def unapply(
+      plan: LogicalPlan
+  ): Option[(NamedRelation, Option[Write], Option[LogicalPlan])] = plan match {
+    case a: AppendData => Some((a.table, a.write, a.analyzedQuery))
+    case m: InsertOnlyMerge => Some((m.table, m.write, m.analyzedQuery))
+    case _ => None
+  }
+}
+
 private[sql] object DataSourceV2Strategy extends Logging {
 
   private def translateLeafNodeFilterV2(predicate: Expression): Option[Predicate] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
index 8d5ee6038e80f..ea61bca266600 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
@@ -19,16 +19,34 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.transactions.TransactionUtils
 import org.apache.spark.sql.connector.catalog.SupportsDeleteV2
+import org.apache.spark.sql.connector.catalog.transactions.Transaction
 import org.apache.spark.sql.connector.expressions.filter.Predicate
+import org.apache.spark.sql.execution.metric.SQLMetric
 
 case class DeleteFromTableExec(
     table: SupportsDeleteV2,
     condition: Array[Predicate],
-    refreshCache: () => Unit) extends LeafV2CommandExec {
+    refreshCache: () => Unit,
+    transaction: Option[Transaction] = None)
+  extends LeafV2CommandExec
+  with TransactionalExec
+  with SupportsCustomDriverMetrics {
+
+  override lazy val customMetrics: Map[String, SQLMetric] =
+    createCustomMetrics(table.supportedCustomMetrics())
+
+  override def withTransaction(txn: Option[Transaction]): DeleteFromTableExec =
+    copy(transaction = txn)
 
   override protected def run(): Seq[InternalRow] = {
-    table.deleteWhere(condition)
+    try {
+      table.deleteWhere(condition)
+    } finally {
+      postDriverMetrics(table.reportDriverMetrics())
+    }
+    transaction.foreach(TransactionUtils.commit)
     refreshCache()
     Seq.empty
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 51f906724b985..68f4620eaf0db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -24,17 +24,153 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, ClusterBySpec}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 import org.apache.spark.sql.connector.expressions.{ClusterByTransform, IdentityTransform}
 import org.apache.spark.sql.connector.read.SupportsReportStatistics
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
 
+/**
+ * Catalog / Namespace / Database / <entity> row formatting shared by
+ * `DescribeTableExec.addTableDetails` and `DescribeV2ViewExec.run`. Hosting it in one place
+ * keeps the row layout (including the v1-compat `Database` row) as a single source of truth
+ * so the table and view paths can't drift.
+ */
+private[v2] trait DescribeIdentifierRows extends LeafV2CommandExec {
+  /**
+   * Append the structured identifier rows (`Catalog`, `Namespace`, `Database`,
+   * `<entityLabel>`) to `rows`. `entityLabel` is `"Table"` for a v2 table and `"View"` for a
+   * v2 view -- the only divergence between the two paths.
+   *
+   * Row shapes:
+   *  - `Catalog` carries the catalog plugin name (always present for v2).
+   *  - `Namespace` is the canonical multi-segment representation, joined with `.` and with
+   *    `quoteIfNeeded` applied per segment (so segments containing dots round-trip). Always
+   *    emitted; for an empty namespace (root-level entity) the value is the empty string,
+   *    so the row's presence stays uniform across v2 outputs.
+   *  - `Database` is always emitted for v1 compatibility. Its value is the trailing
+   *    namespace segment (so multi-segment namespaces still surface their leaf segment),
+   *    or the empty string when the namespace is the catalog root. Consumers that need
+   *    the full namespace should read `Namespace`; `Database` alone is not round-trip-safe
+   *    for multi-segment cases.
+   *  - `<entityLabel>` is the unqualified entity name from `Identifier.name()`.
+   */
+  protected def addIdentifierRows(
+      rows: ArrayBuffer[InternalRow],
+      catalogName: String,
+      identifier: Identifier,
+      entityLabel: String): Unit = {
+    rows += toCatalystRow("Catalog", catalogName, "")
+    rows += toCatalystRow("Namespace", identifier.namespace().quoted, "")
+    rows += toCatalystRow("Database", identifier.namespace().lastOption.getOrElse(""), "")
+    rows += toCatalystRow(entityLabel, identifier.name(), "")
+  }
+}
+
+/**
+ * Schema + partitioning + clustering row formatting shared by `DescribeTableExec.run()` (which
+ * uses it for the schema-row prefix) and `DescribeTablePartitionExec.run()` (which uses it as
+ * the entire pre-partition section). Mixing the helpers into a trait lets each exec invoke
+ * them directly off `this`, so the partition exec doesn't need to thread the table-only
+ * `catalogName` / `identifier` arguments that `DescribeTableExec` consumes for the EXTENDED
+ * `# Detailed Table Information` block.
+ *
+ * Kept orthogonal to [[DescribeIdentifierRows]] so `DescribeTablePartitionExec` (which only
+ * needs the schema/partitioning rows) doesn't inherit identifier-row helpers it never calls.
+ * `DescribeTableExec` mixes both traits in.
+ */
+private[v2] trait DescribeTableBaseRows extends LeafV2CommandExec {
+  def table: Table
+
+  /** A blank `("", "", "")` row used as a section separator in DESCRIBE output. */
+  protected def emptyRow(): InternalRow = toCatalystRow("", "", "")
+
+  /** Schema + partitioning + clustering rows, shared with DescribeTablePartitionExec. */
+  protected def addBaseDescription(rows: ArrayBuffer[InternalRow]): Unit = {
+    addSchema(rows)
+    addPartitioning(rows)
+    addClustering(rows)
+  }
+
+  private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
+    rows ++= table.columns().map{ column =>
+      toCatalystRow(
+        column.name, column.dataType.simpleString, column.comment)
+    }
+  }
+
+  private def addClusteringToRows(
+      clusterBySpec: ClusterBySpec,
+      rows: ArrayBuffer[InternalRow]): Unit = {
+    rows += toCatalystRow("# Clustering Information", "", "")
+    rows += toCatalystRow(s"# ${output.head.name}", output(1).name, output(2).name)
+    rows ++= clusterBySpec.columnNames.map { fieldNames =>
+      val schema = CatalogV2Util.v2ColumnsToStructType(table.columns())
+      val nestedField = schema.findNestedField(fieldNames.fieldNames.toIndexedSeq)
+      assert(nestedField.isDefined,
+        "The clustering column " +
+          s"${fieldNames.fieldNames.map(quoteIfNeeded).mkString(".")} " +
+          s"was not found in the table schema ${schema.catalogString}.")
+      nestedField.get
+    }.map { case (path, field) =>
+      toCatalystRow(
+        (path :+ field.name).map(quoteIfNeeded).mkString("."),
+        field.dataType.simpleString,
+        field.getComment().orNull)
+    }
+  }
+
+  private def addClustering(rows: ArrayBuffer[InternalRow]): Unit = {
+    ClusterBySpec.extractClusterBySpec(table.partitioning.toIndexedSeq).foreach { clusterBySpec =>
+      addClusteringToRows(clusterBySpec, rows)
+    }
+  }
+
+  private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
+    // Clustering columns are handled in addClustering().
+    val partitioning = table.partitioning
+      .filter(t => !t.isInstanceOf[ClusterByTransform])
+    if (partitioning.nonEmpty) {
+      val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform])
+      if (partitionColumnsOnly) {
+        rows += toCatalystRow("# Partition Information", "", "")
+        rows += toCatalystRow(s"# ${output(0).name}", output(1).name, output(2).name)
+        val schema = CatalogV2Util.v2ColumnsToStructType(table.columns())
+        rows ++= table.partitioning
+          .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
+          .map { fieldNames =>
+            val nestedField = schema.findNestedField(fieldNames.toImmutableArraySeq)
+            if (nestedField.isEmpty) {
+              throw QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
+                fieldNames.toSeq,
+                schema)
+            }
+            nestedField.get
+          }.map { case (path, field) =>
+            toCatalystRow(
+              (path :+ field.name).map(quoteIfNeeded(_)).mkString("."),
+              field.dataType.simpleString,
+              field.getComment().orNull)
+          }
+      } else {
+        rows += emptyRow()
+        rows += toCatalystRow("# Partitioning", "", "")
+        rows ++= table.partitioning.zipWithIndex.map {
+          case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
+        }
+      }
+    }
+  }
+}
+
 case class DescribeTableExec(
     output: Seq[Attribute],
+    catalogName: String,
+    identifier: Identifier,
     table: Table,
-    isExtended: Boolean) extends LeafV2CommandExec {
+    isExtended: Boolean) extends DescribeTableBaseRows with DescribeIdentifierRows {
   override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
     addBaseDescription(rows)
@@ -48,17 +184,10 @@ case class DescribeTableExec(
     rows.toSeq
   }
 
-  /** Schema + partitioning + clustering rows, shared with DescribeTablePartitionExec. */
-  private[v2] def addBaseDescription(rows: ArrayBuffer[InternalRow]): Unit = {
-    addSchema(rows)
-    addPartitioning(rows)
-    addClustering(rows)
-  }
-
   private def addTableDetails(rows: ArrayBuffer[InternalRow]): Unit = {
     rows += emptyRow()
     rows += toCatalystRow("# Detailed Table Information", "", "")
-    rows += toCatalystRow("Name", table.name(), "")
+    addIdentifierRows(rows, catalogName, identifier, entityLabel = "Table")
 
     val tableType = if (table.properties().containsKey(TableCatalog.PROP_EXTERNAL)) {
       CatalogTableType.EXTERNAL.name
@@ -87,13 +216,6 @@ case class DescribeTableExec(
     }
   }
 
-  private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
-    rows ++= table.columns().map{ column =>
-      toCatalystRow(
-        column.name, column.dataType.simpleString, column.comment)
-    }
-  }
-
   private def addTableConstraints(rows: ArrayBuffer[InternalRow]): Unit = {
     if (table.constraints.nonEmpty) {
       rows += emptyRow()
@@ -117,33 +239,6 @@ case class DescribeTableExec(
     case _ =>
   }
 
-  private def addClusteringToRows(
-      clusterBySpec: ClusterBySpec,
-      rows: ArrayBuffer[InternalRow]): Unit = {
-    rows += toCatalystRow("# Clustering Information", "", "")
-    rows += toCatalystRow(s"# ${output.head.name}", output(1).name, output(2).name)
-    rows ++= clusterBySpec.columnNames.map { fieldNames =>
-      val schema = CatalogV2Util.v2ColumnsToStructType(table.columns())
-      val nestedField = schema.findNestedField(fieldNames.fieldNames.toIndexedSeq)
-      assert(nestedField.isDefined,
-        "The clustering column " +
-          s"${fieldNames.fieldNames.map(quoteIfNeeded).mkString(".")} " +
-          s"was not found in the table schema ${schema.catalogString}.")
-      nestedField.get
-    }.map { case (path, field) =>
-      toCatalystRow(
-        (path :+ field.name).map(quoteIfNeeded).mkString("."),
-        field.dataType.simpleString,
-        field.getComment().orNull)
-    }
-  }
-
-  private def addClustering(rows: ArrayBuffer[InternalRow]): Unit = {
-    ClusterBySpec.extractClusterBySpec(table.partitioning.toIndexedSeq).foreach { clusterBySpec =>
-      addClusteringToRows(clusterBySpec, rows)
-    }
-  }
-
   private def addTableStats(rows: ArrayBuffer[InternalRow]): Unit = table match {
     case read: SupportsRead =>
       read.newScanBuilder(CaseInsensitiveStringMap.empty()).build() match {
@@ -160,42 +255,4 @@ case class DescribeTableExec(
       }
     case _ =>
   }
-
-  private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
-    // Clustering columns are handled in addClustering().
-    val partitioning = table.partitioning
-      .filter(t => !t.isInstanceOf[ClusterByTransform])
-    if (partitioning.nonEmpty) {
-      val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform])
-      if (partitionColumnsOnly) {
-        rows += toCatalystRow("# Partition Information", "", "")
-        rows += toCatalystRow(s"# ${output(0).name}", output(1).name, output(2).name)
-        val schema = CatalogV2Util.v2ColumnsToStructType(table.columns())
-        rows ++= table.partitioning
-          .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
-          .map { fieldNames =>
-            val nestedField = schema.findNestedField(fieldNames.toImmutableArraySeq)
-            if (nestedField.isEmpty) {
-              throw QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
-                fieldNames.toSeq,
-                schema)
-            }
-            nestedField.get
-          }.map { case (path, field) =>
-            toCatalystRow(
-              (path :+ field.name).map(quoteIfNeeded(_)).mkString("."),
-              field.dataType.simpleString,
-              field.getComment().orNull)
-          }
-      } else {
-        rows += emptyRow()
-        rows += toCatalystRow("# Partitioning", "", "")
-        rows ++= table.partitioning.zipWithIndex.map {
-          case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
-        }
-      }
-    }
-  }
-
-  private def emptyRow(): InternalRow = toCatalystRow("", "", "")
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTablePartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTablePartitionExec.scala
index 6a2c3c441f8fb..6006eface77bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTablePartitionExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTablePartitionExec.scala
@@ -32,14 +32,16 @@ case class DescribeTablePartitionExec(
     table: SupportsPartitionManagement,
     tableIdent: Identifier,
     partSpec: ResolvedPartitionSpec,
-    isExtended: Boolean) extends LeafV2CommandExec {
+    isExtended: Boolean) extends DescribeTableBaseRows {
 
   override protected def run(): Seq[InternalRow] = {
     val partitionRow = validateAndGetPartition()
 
-    // Delegate schema + partitioning + clustering to DescribeTableExec.
+    // Schema + partitioning + clustering rows come from the shared `DescribeTableBaseRows`
+    // trait, which is mixed in by both this exec and `DescribeTableExec` so each can call
+    // the helper directly off `this`.
     val rows = new ArrayBuffer[InternalRow]()
-    DescribeTableExec(output, table, isExtended = false).addBaseDescription(rows)
+    addBaseDescription(rows)
 
     if (isExtended) {
       addPartitionDetails(rows, partitionRow)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index af440a161ccb7..88968ac2ff952 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -19,13 +19,24 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.catalyst.util.quoteIfNeeded
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.CommandUtils.isPurgeableExternalTable
 import org.apache.spark.util.ArrayImplicits._
 
 /**
  * Physical plan node for dropping a table.
+ *
+ * Probes `tableExists` upfront so `IF EXISTS` over a missing table is a clean no-op even
+ * on catalogs whose `dropTable` / `purgeTable` does not honor the "return false on missing"
+ * contract (e.g. JDBC catalogs that throw a SQL syntax error, or the default `purgeTable`
+ * that throws `UNSUPPORTED_FEATURE.PURGE_TABLE` unconditionally).
+ *
+ * When the table is absent, falls back to `viewExists` for catalogs that also implement
+ * [[ViewCatalog]] -- distinguishes "wrong type" from "missing" so a `DROP TABLE someView`
+ * on a mixed catalog surfaces `WRONG_COMMAND_FOR_OBJECT_TYPE` ("Use DROP VIEW instead")
+ * rather than a generic "table not found", matching v1 `DropTableCommand(isView = false)`.
  */
 case class DropTableExec(
     catalog: TableCatalog,
@@ -42,9 +53,22 @@ case class DropTableExec(
       } else {
         catalog.dropTable(ident)
       }
-    } else if (!ifExists) {
-      val nameParts = (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq
-      throw QueryCompilationErrors.noSuchTableError(nameParts)
+    } else {
+      val nameParts =
+        (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq
+      catalog match {
+        case vc: ViewCatalog if vc.viewExists(ident) =>
+          throw QueryCompilationErrors.wrongCommandForObjectTypeError(
+            operation = "DROP TABLE",
+            requiredType = "TABLE",
+            objectName = nameParts.map(quoteIfNeeded).mkString("."),
+            foundType = "VIEW",
+            alternative = "DROP VIEW")
+        case _ if !ifExists =>
+          throw QueryCompilationErrors.noSuchTableError(nameParts)
+        case _ =>
+        // IF EXISTS: no-op.
+      }
     }
 
     Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala
new file mode 100644
index 0000000000000..c1655402d368c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.NoSuchViewException
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.util.quoteIfNeeded
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * Physical plan node for DROP VIEW on a v2 [[ViewCatalog]]. Calls [[ViewCatalog#dropView]]; if
+ * it returns false and the catalog also implements [[TableCatalog]] with a table at this
+ * identifier, surfaces `WRONG_COMMAND_FOR_OBJECT_TYPE` ("Use DROP TABLE instead") rather than
+ * a generic "view not found" -- matching v1 `DropTableCommand(isView = true)`.
+ */
+case class DropViewExec(
+    catalog: ViewCatalog,
+    ident: Identifier,
+    ifExists: Boolean,
+    invalidateCache: () => Unit) extends LeafV2CommandExec {
+
+  override protected def run(): Seq[InternalRow] = {
+    val dropped = catalog.dropView(ident)
+    if (dropped) {
+      invalidateCache()
+    } else {
+      val nameParts =
+        (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq
+      catalog match {
+        case tc: TableCatalog if tc.tableExists(ident) =>
+          throw QueryCompilationErrors.wrongCommandForObjectTypeError(
+            operation = "DROP VIEW",
+            requiredType = "VIEW",
+            objectName = nameParts.map(quoteIfNeeded).mkString("."),
+            foundType = "TABLE",
+            alternative = "DROP TABLE")
+        case _ if !ifExists =>
+          throw new NoSuchViewException(ident)
+        case _ =>
+        // IF EXISTS: no-op.
+      }
+    }
+    Seq.empty
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExec.scala
index 64c937499f742..264a0e954936f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExec.scala
@@ -50,13 +50,18 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
  * @param distributePartitions When true, splits for a key are distributed across the expected
  *                             partitions (padding with empty partitions). When false, all splits
  *                             are replicated to every expected partition for that key.
+ * @param enableSortedMerge When true, uses [[SortedMergeCoalescedRDD]] to perform a k-way merge
+ *                          of the coalesced partitions, preserving the child's output ordering
+ *                          end-to-end. Set by [[EnsureRequirements]] when a parent operator
+ *                          requires the ordering that this node can satisfy via sorted merge.
  */
 case class GroupPartitionsExec(
     child: SparkPlan,
     @transient joinKeyPositions: Option[Seq[Int]] = None,
     @transient expectedPartitionKeys: Option[Seq[(InternalRowComparableWrapper, Int)]] = None,
     @transient reducers: Option[Seq[Option[Reducer[_, _]]]] = None,
-    @transient distributePartitions: Boolean = false
+    @transient distributePartitions: Boolean = false,
+    @transient enableSortedMerge: Boolean = false
   ) extends UnaryExecNode {
 
   override def outputPartitioning: Partitioning = {
@@ -160,6 +165,8 @@ case class GroupPartitionsExec(
 
   @transient lazy val isGrouped: Boolean = groupedPartitionsTuple._2
 
+  @transient private lazy val hasCoalescing: Boolean = groupedPartitions.exists(_._2.size > 1)
+
   // Whether the child subtree is safe to use with SortedMergeCoalescedRDD (k-way merge).
   //
   // --- The general problem ---
@@ -223,10 +230,23 @@ case class GroupPartitionsExec(
       child.outputOrdering.nonEmpty &&
       childIsSafeForKWayMerge
 
+  /**
+   * Returns a copy of this node with k-way merge enabled if it is feasible: the config is on,
+   * the child has an ordering, the child subtree is `SafeForKWayMerge`, and this node actually
+   * coalesces partitions.
+   */
+  def tryEnableSortedMerge(): Option[GroupPartitionsExec] = {
+    Option.when(hasCoalescing && canUseSortedMerge) {
+      val newGroupPartitions = copy(enableSortedMerge = true)
+      newGroupPartitions.copyTagsFrom(this)
+      newGroupPartitions
+    }
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     if (groupedPartitions.isEmpty) {
       sparkContext.emptyRDD
-    } else if (canUseSortedMerge && groupedPartitions.exists(_._2.size > 1)) {
+    } else if (hasCoalescing && enableSortedMerge && canUseSortedMerge) {
       val partitionCoalescer = new GroupedPartitionCoalescer(groupedPartitions.map(_._2))
       val rowOrdering = new LazyCodeGenOrdering(child.outputOrdering, child.output)
       new SortedMergeCoalescedRDD[InternalRow](
@@ -241,7 +261,7 @@ case class GroupPartitionsExec(
   }
 
   override def supportsColumnar: Boolean =
-    child.supportsColumnar && !(canUseSortedMerge && groupedPartitions.exists(_._2.size > 1))
+    child.supportsColumnar && !(hasCoalescing && enableSortedMerge && canUseSortedMerge)
 
   override protected def doExecuteColumnar(): RDD[ColumnarBatch] = {
     if (groupedPartitions.isEmpty) {
@@ -258,12 +278,12 @@ case class GroupPartitionsExec(
     copy(child = newChild)
 
   override def outputOrdering: Seq[SortOrder] = {
-    if (groupedPartitions.forall(_._2.size <= 1)) {
+    if (!hasCoalescing) {
       // No coalescing: each output partition is exactly one input partition. The child's
       // within-partition ordering is fully preserved (including any key-derived ordering that
       // `DataSourceV2ScanExecBase` already prepended).
       child.outputOrdering
-    } else if (canUseSortedMerge) {
+    } else if (enableSortedMerge && canUseSortedMerge) {
       // Coalescing with sorted merge: SortedMergeCoalescedRDD performs a k-way merge using the
       // child's ordering, so the full within-partition ordering is preserved end-to-end.
       child.outputOrdering
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala
index 526ff843a1496..887f6d832c82b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical.MergeRows.{Context, Copy, Del
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{CodegenSupport, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.execution.metric.{SQLLastAttemptMetrics, SQLMetric}
 import org.apache.spark.sql.types.BooleanType
 
 case class MergeRowsExec(
@@ -50,21 +50,21 @@ case class MergeRowsExec(
     child: SparkPlan) extends UnaryExecNode with CodegenSupport {
 
   override lazy val metrics: Map[String, SQLMetric] = Map(
-    "numTargetRowsCopied" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsCopied" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows copied unmodified because they did not match any action"),
-    "numTargetRowsInserted" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsInserted" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows inserted"),
-    "numTargetRowsDeleted" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsDeleted" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows deleted"),
-    "numTargetRowsUpdated" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsUpdated" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows updated"),
-    "numTargetRowsMatchedUpdated" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsMatchedUpdated" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows updated by a matched clause"),
-    "numTargetRowsMatchedDeleted" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsMatchedDeleted" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows deleted by a matched clause"),
-    "numTargetRowsNotMatchedBySourceUpdated" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsNotMatchedBySourceUpdated" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows updated by a not matched by source clause"),
-    "numTargetRowsNotMatchedBySourceDeleted" -> SQLMetrics.createMetric(sparkContext,
+    "numTargetRowsNotMatchedBySourceDeleted" -> SQLLastAttemptMetrics.createMetric(sparkContext,
       "number of target rows deleted by a not matched by source clause"))
 
   @transient override lazy val producedAttributes: AttributeSet = {
@@ -518,6 +518,19 @@ case class MergeRowsExec(
       private val notMatchedBySourceInstructions: Seq[InstructionExec])
     extends Iterator[InternalRow] {
 
+    // Resolve each metric at most once per partition, on first use; longMetric(name) is a map
+    // lookup. See SPARK-56933.
+    private lazy val numTargetRowsCopied = longMetric("numTargetRowsCopied")
+    private lazy val numTargetRowsInserted = longMetric("numTargetRowsInserted")
+    private lazy val numTargetRowsDeleted = longMetric("numTargetRowsDeleted")
+    private lazy val numTargetRowsUpdated = longMetric("numTargetRowsUpdated")
+    private lazy val numTargetRowsMatchedUpdated = longMetric("numTargetRowsMatchedUpdated")
+    private lazy val numTargetRowsMatchedDeleted = longMetric("numTargetRowsMatchedDeleted")
+    private lazy val numTargetRowsNotMatchedBySourceUpdated =
+      longMetric("numTargetRowsNotMatchedBySourceUpdated")
+    private lazy val numTargetRowsNotMatchedBySourceDeleted =
+      longMetric("numTargetRowsNotMatchedBySourceDeleted")
+
     var cachedExtraRow: InternalRow = _
 
     override def hasNext: Boolean = cachedExtraRow != null || rowIterator.hasNext
@@ -579,28 +592,27 @@ case class MergeRowsExec(
 
       null
     }
-  }
 
-  // For group based merge, copy is inserted if row matches no other case
-  private def incrementCopyMetric(): Unit = longMetric("numTargetRowsCopied") += 1
+    private def incrementCopyMetric(): Unit = numTargetRowsCopied += 1
 
-  private def incrementInsertMetric(): Unit = longMetric("numTargetRowsInserted") += 1
+    private def incrementInsertMetric(): Unit = numTargetRowsInserted += 1
 
-  private def incrementDeleteMetric(sourcePresent: Boolean): Unit = {
-    longMetric("numTargetRowsDeleted") += 1
-    if (sourcePresent) {
-      longMetric("numTargetRowsMatchedDeleted") += 1
-    } else {
-      longMetric("numTargetRowsNotMatchedBySourceDeleted") += 1
+    private def incrementDeleteMetric(sourcePresent: Boolean): Unit = {
+      numTargetRowsDeleted += 1
+      if (sourcePresent) {
+        numTargetRowsMatchedDeleted += 1
+      } else {
+        numTargetRowsNotMatchedBySourceDeleted += 1
+      }
     }
-  }
 
-  private def incrementUpdateMetric(sourcePresent: Boolean): Unit = {
-    longMetric("numTargetRowsUpdated") += 1
-    if (sourcePresent) {
-      longMetric("numTargetRowsMatchedUpdated") += 1
-    } else {
-      longMetric("numTargetRowsNotMatchedBySourceUpdated") += 1
+    private def incrementUpdateMetric(sourcePresent: Boolean): Unit = {
+      numTargetRowsUpdated += 1
+      if (sourcePresent) {
+        numTargetRowsMatchedUpdated += 1
+      } else {
+        numTargetRowsNotMatchedBySourceUpdated += 1
+      }
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
index f81ca001fbe29..bf958ba5aad4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
@@ -55,7 +55,7 @@ case class MicroBatchScanExec(
   override lazy val inputRDD: RDD[InternalRow] = {
     val inputRDD = new DataSourceRDD(sparkContext, partitions, readerFactory, supportsColumnar,
       customMetrics)
-    postDriverMetrics()
+    postDriverMetrics(scan.reportDriverMetrics())
     inputRDD
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
index 29bbfacaea004..c02fdec1fab60 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
@@ -114,7 +114,7 @@ object OptimizeMetadataOnlyDeleteFromTable extends Rule[LogicalPlan] with Predic
         val command = rd.operation.command
         Some(rd, command, cond, originalTable)
 
-      case wd @ WriteDelta(_, cond, _, originalTable, _, _) =>
+      case wd @ WriteDelta(_, cond, _, originalTable, _, _, _) =>
         val command = wd.operation.command
         Some(wd, command, cond, originalTable)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
index 0d34dfc91c39f..8c5396d7eb12d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -22,13 +22,14 @@ import scala.collection.mutable
 import org.apache.spark.internal.{Logging, LogKeys}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, DynamicPruning, DynamicPruningExpression, Expression, ExpressionSet, GetStructField, NamedExpression, PythonUDF, SchemaPruning, SubqueryExpression, V2ExpressionUtils}
+import org.apache.spark.sql.catalyst.plans.logical.SampleMethod
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.expressions.{IdentityTransform, SortOrder}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
-import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters, SupportsRuntimeV2Filtering}
+import org.apache.spark.sql.connector.read.{SampleMethod => SampleMethodV2, Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters, SupportsRuntimeV2Filtering}
 import org.apache.spark.sql.execution.{ScalarSubquery => ExecScalarSubquery}
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, DataSourceUtils}
 import org.apache.spark.sql.internal.SQLConf
@@ -109,19 +110,25 @@ object PushDownUtils extends Logging {
           }
         }
 
-        val rejectedFilters = r.pushPredicates(translatedFilters.toArray).map { predicate =>
-          DataSourceV2Strategy.rebuildExpressionFromFilter(predicate, translatedFilterToExpr)
-        }
+        val postScanPredicates = r.pushPredicates(translatedFilters.toArray)
 
-        val remainingFilters = (rejectedFilters ++ untranslatableExprs).toSeq
-        val postScanFilters =
+        val finalPostScanFilters =
           if (!partitionFields.exists(_.nonEmpty) || !r.supportsIterativePushdown) {
-            remainingFilters
+            rebuildExpressions(postScanPredicates.toSeq, translatedFilterToExpr) ++
+              untranslatableExprs
           } else {
-            pushPartitionPredicates(r, partitionFields.get, remainingFilters)
+            // Second pass: only filters that were not already pushed down (partially or fully)
+            // in the first pass (not in pushedPredicates) are eligible to be pushed down again.
+            // This avoids pushing the same filter down twice.
+            val (pushedPostScanFilters, notPushedPostScanFilters) =
+              postScanPredicates.toSeq.partition(r.pushedPredicates().toSet.contains)
+            val candidates = rebuildExpressions(notPushedPostScanFilters, translatedFilterToExpr) ++
+              untranslatableExprs
+            pushPartitionPredicates(r, partitionFields.get, candidates) ++
+              rebuildExpressions(pushedPostScanFilters, translatedFilterToExpr)
           }
 
-        val orderedPostScanFilters = prioritizeFilters(postScanFilters,
+        val orderedPostScanFilters = prioritizeFilters(finalPostScanFilters,
           ExpressionSet(untranslatableExprs))
         (Right(r.pushedPredicates.toImmutableArraySeq), orderedPostScanFilters)
       case r: SupportsPushDownCatalystFilters =>
@@ -131,6 +138,18 @@ object PushDownUtils extends Logging {
     }
   }
 
+  /**
+   * Rebuilds the Catalyst [[Expression]]s for a sequence of data source [[Predicate]]s, using the
+   * mapping from translated data source predicates to their original Catalyst expressions.
+   */
+  private def rebuildExpressions(
+      predicates: Seq[Predicate],
+      translatedFilterToExpr: mutable.HashMap[Predicate, Expression]): Seq[Expression] = {
+    predicates.map { predicate =>
+      DataSourceV2Strategy.rebuildExpressionFromFilter(predicate, translatedFilterToExpr)
+    }
+  }
+
   /**
    * Pushes runtime filters to a [[SupportsRuntimeV2Filtering]] scan. Translatable filters are
    * pushed first, followed by [[PartitionPredicate]] if the scan supports iterative filtering.
@@ -398,7 +417,11 @@ object PushDownUtils extends Logging {
     scanBuilder match {
       case s: SupportsPushDownTableSample =>
         s.pushTableSample(
-          sample.lowerBound, sample.upperBound, sample.withReplacement, sample.seed)
+          sample.lowerBound, sample.upperBound, sample.withReplacement, sample.seed,
+          sample.sampleMethod match {
+            case SampleMethod.Bernoulli => SampleMethodV2.BERNOULLI
+            case SampleMethod.System => SampleMethodV2.SYSTEM
+          })
       case _ => false
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RealTimeStreamScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RealTimeStreamScanExec.scala
index 9fff2d91af14c..cb4867cf8c6ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RealTimeStreamScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RealTimeStreamScanExec.scala
@@ -166,7 +166,7 @@ case class RealTimeStreamScanExec(
       supportsColumnar,
       customMetrics
     )
-    postDriverMetrics()
+    postDriverMetrics(scan.reportDriverMetrics())
     inputRDD
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index d1496bfa4c0c9..4ab328d0b4d55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, CharVarcharUtils}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog, V1Table}
 import org.apache.spark.sql.connector.expressions.BucketTransform
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.types.StructType
@@ -98,7 +98,8 @@ case class ShowCreateTableExec(
       val transforms = new ArrayBuffer[String]
       var bucketSpec = Option.empty[BucketSpec]
       table.partitioning.map {
-        case BucketTransform(numBuckets, col, sortCol) =>
+        case BucketTransform(numBuckets, col, sortCol) if table.isInstanceOf[V1Table] =>
+          require(bucketSpec.isEmpty, "V1Table can not define multiple bucket transforms")
           if (sortCol.isEmpty) {
             bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")), Nil))
           } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
index 8ccd126b4b22c..8680785e0815f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
@@ -22,12 +22,18 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, TableCatalog, TableViewCatalog}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 import org.apache.spark.sql.execution.LeafExecNode
 
 /**
  * Physical plan node for showing tables.
+ *
+ * For a [[TableViewCatalog]] (one that exposes both tables and views in a shared identifier
+ * namespace), this routes through [[TableViewCatalog#listTableAndViewSummaries]] so that views are
+ * included in the listing -- matching the v1 `SHOW TABLES` semantics where views appear
+ * alongside tables. Pure [[TableCatalog]] catalogs continue to use `listTables` and return
+ * tables only.
  */
 case class ShowTablesExec(
     output: Seq[Attribute],
@@ -37,10 +43,14 @@ case class ShowTablesExec(
   override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
 
-    val tables = catalog.listTables(namespace.toArray)
-    tables.map { table =>
-      if (pattern.map(StringUtils.filterPattern(Seq(table.name()), _).nonEmpty).getOrElse(true)) {
-        rows += toCatalystRow(table.namespace().quoted, table.name(), isTempView(table, catalog))
+    val identifiers: Array[Identifier] = catalog match {
+      case mc: TableViewCatalog =>
+        mc.listTableAndViewSummaries(namespace.toArray).map(_.identifier())
+      case _ => catalog.listTables(namespace.toArray)
+    }
+    identifiers.foreach { ident =>
+      if (pattern.map(StringUtils.filterPattern(Seq(ident.name()), _).nonEmpty).getOrElse(true)) {
+        rows += toCatalystRow(ident.namespace().quoted, ident.name(), isTempView(ident, catalog))
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala
new file mode 100644
index 0000000000000..00927f05842ad
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
+import org.apache.spark.sql.connector.catalog.ViewCatalog
+import org.apache.spark.sql.execution.LeafExecNode
+
+/**
+ * Physical plan node for SHOW VIEWS on a v2 [[ViewCatalog]]. Enumerates view identifiers via
+ * [[ViewCatalog#listViews]]. v2 catalogs have no temp views, so the {@code isTemporary} column
+ * is always false -- mirroring v1 {@code ShowViewsCommand}, which sets {@code isTemporary=true}
+ * only for local/global temp views that live in the session catalog.
+ */
+case class ShowViewsExec(
+    output: Seq[Attribute],
+    catalog: ViewCatalog,
+    namespace: Seq[String],
+    pattern: Option[String]) extends V2CommandExec with LeafExecNode {
+  override protected def run(): Seq[InternalRow] = {
+    val rows = new ArrayBuffer[InternalRow]()
+    catalog.listViews(namespace.toArray).foreach { ident =>
+      val nameMatches =
+        pattern.forall(p => StringUtils.filterPattern(Seq(ident.name), p).nonEmpty)
+      if (nameMatches) {
+        rows += toCatalystRow(ident.namespace().quoted, ident.name(), false)
+      }
+    }
+    rows.toSeq
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SupportsCustomDriverMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SupportsCustomDriverMetrics.scala
new file mode 100644
index 0000000000000..dc2dd2fb02a1e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SupportsCustomDriverMetrics.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric}
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * A mixin for Spark plan nodes that expose driver-side custom metrics reported by a connector.
+ * Implementations declare the connector-owned metrics via [[customMetrics]]; after the underlying
+ * operation has executed they call [[postDriverMetrics]] with the connector's reported values so
+ * they are visible in the SQL UI.
+ *
+ * Nodes that also expose Spark-owned metrics supply them via [[sparkMetrics]]. Names in
+ * [[sparkMetrics]] are reserved: if the connector happens to report a value under the same name,
+ * Spark's value wins and the connector's is dropped.
+ */
+trait SupportsCustomDriverMetrics { self: SparkPlan =>
+
+  /**
+   * The custom metrics the connector supports for this operation, keyed by name.
+   */
+  def customMetrics: Map[String, SQLMetric]
+
+  /**
+   * Spark-owned metrics that should appear alongside the connector-declared ones. Values under
+   * these names are owned by Spark and take precedence on a name collision.
+   */
+  protected def sparkMetrics: Map[String, SQLMetric] = Map.empty
+
+  override lazy val metrics: Map[String, SQLMetric] = customMetrics ++ sparkMetrics
+
+  /**
+   * Converts an array of connector-declared metrics into the map shape [[customMetrics]] uses.
+   */
+  protected def createCustomMetrics(metrics: Array[CustomMetric]): Map[String, SQLMetric] = {
+    metrics.map { m =>
+      m.name -> SQLMetrics.createV2CustomMetric(sparkContext, m)
+    }.toMap
+  }
+
+  /**
+   * Applies the values reported by the connector to the declared metrics and posts them so the
+   * SQL UI reflects the final values. Metrics not declared via [[customMetrics]] are ignored.
+   * Metrics whose name collides with [[sparkMetrics]] are also ignored so Spark-owned values
+   * are preserved.
+   */
+  protected def postDriverMetrics(taskMetrics: Array[CustomTaskMetric]): Unit = {
+    val updated = taskMetrics.flatMap { t =>
+      if (sparkMetrics.contains(t.name())) {
+        // Spark metrics take precedence on collisions.
+        None
+      } else {
+        metrics.get(t.name()).map { metric =>
+          metric.set(t.value())
+          metric
+        }
+      }
+    }
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, updated.toImmutableArraySeq)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
index b25059bd7bac1..988aa86db1d34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertOnlyMerge, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.catalog.TableCapability._
@@ -49,6 +49,9 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
       case AppendData(r: DataSourceV2Relation, _, _, _, _, _, _) if !supportsBatchWrite(r.table) =>
         throw QueryCompilationErrors.unsupportedAppendInBatchModeError(r.name)
 
+      case InsertOnlyMerge(r: DataSourceV2Relation, _, _, _) if !supportsBatchWrite(r.table) =>
+        throw QueryCompilationErrors.unsupportedAppendInBatchModeError(r.name)
+
       case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _, _, _)
         if !r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_DYNAMIC) =>
         throw QueryCompilationErrors.unsupportedDynamicOverwriteInBatchModeError(r.table)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableSampleInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableSampleInfo.scala
index cb4fb9eb0809a..441ed28c813c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableSampleInfo.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableSampleInfo.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.sql.catalyst.plans.logical.SampleMethod
+
 case class TableSampleInfo(
     lowerBound: Double,
     upperBound: Double,
     withReplacement: Boolean,
-    seed: Long)
+    seed: Long,
+    sampleMethod: SampleMethod = SampleMethod.Bernoulli)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TruncateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TruncateTableExec.scala
index 948dc1bc8c87c..0bcfc4f364182 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TruncateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TruncateTableExec.scala
@@ -20,18 +20,28 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.catalog.TruncatableTable
+import org.apache.spark.sql.execution.metric.SQLMetric
 
 /**
  * Physical plan node for table truncation.
  */
 case class TruncateTableExec(
     table: TruncatableTable,
-    refreshCache: () => Unit) extends LeafV2CommandExec {
+    refreshCache: () => Unit)
+  extends LeafV2CommandExec
+  with SupportsCustomDriverMetrics {
+
+  override lazy val customMetrics: Map[String, SQLMetric] =
+    createCustomMetrics(table.supportedCustomMetrics())
 
   override def output: Seq[Attribute] = Seq.empty
 
   override protected def run(): Seq[InternalRow] = {
-    if (table.truncateTable()) refreshCache()
-    Seq.empty
+    try {
+      if (table.truncateTable()) refreshCache()
+      Seq.empty
+    } finally {
+      postDriverMetrics(table.reportDriverMetrics())
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index 3eadffb8f0ae4..9e3f5e3e1d4f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -23,8 +23,8 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.classic.Dataset
 import org.apache.spark.sql.connector.catalog.SupportsWrite
 import org.apache.spark.sql.connector.write.V1Write
-import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
-import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.sources.InsertableRelation
 
 /**
@@ -56,13 +56,15 @@ case class OverwriteByExpressionExecV1(
     write: V1Write) extends V1FallbackWriters
 
 /** Some helper interfaces that use V2 write semantics through the V1 writer interface. */
-sealed trait V1FallbackWriters extends LeafV2CommandExec with SupportsV1Write {
+sealed trait V1FallbackWriters
+  extends LeafV2CommandExec
+  with SupportsV1Write
+  with SupportsCustomDriverMetrics {
+
   override def output: Seq[Attribute] = Nil
 
-  override val metrics: Map[String, SQLMetric] =
-    write.supportedCustomMetrics().map { customMetric =>
-      customMetric.name() -> SQLMetrics.createV2CustomMetric(sparkContext, customMetric)
-    }.toMap
+  override lazy val customMetrics: Map[String, SQLMetric] =
+    createCustomMetrics(write.supportedCustomMetrics())
 
   def table: SupportsWrite
   def refreshCache: () => Unit
@@ -75,12 +77,7 @@ sealed trait V1FallbackWriters extends LeafV2CommandExec with SupportsV1Write {
 
       Nil
     } finally {
-      write.reportDriverMetrics().foreach { customTaskMetric =>
-        metrics.get(customTaskMetric.name()).foreach(_.set(customTaskMetric.value()))
-      }
-
-      val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-      SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
+      postDriverMetrics(write.reportDriverMetrics())
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
index c0b72123065f7..a1c69847c509b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -23,11 +23,12 @@ import scala.collection.mutable
 
 import org.apache.spark.{SparkException, SparkIllegalArgumentException}
 import org.apache.spark.internal.LogKeys.{AGGREGATE_FUNCTIONS, COLUMN_NAMES, GROUP_BY_EXPRS, JOIN_CONDITION, JOIN_TYPE, POST_SCAN_FILTERS, PUSHED_FILTERS, RELATION_NAME, RELATION_OUTPUT}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{aggregate, Alias, And, Attribute, AttributeMap, AttributeReference, AttributeSet, Cast, Expression, ExpressionSet, ExprId, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.CollapseProject
 import org.apache.spark.sql.catalyst.planning.{PhysicalOperation, ScanOperation}
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Join, LeafNode, Limit, LimitAndOffset, LocalLimit, LogicalPlan, Offset, OffsetAndLimit, Project, Sample, Sort}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Join, LeafNode, Limit, LimitAndOffset, LocalLimit, LogicalPlan, Offset, OffsetAndLimit, Project, Sample, SampleMethod, Sort}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.connector.expressions.{SortOrder => V2SortOrder}
@@ -150,6 +151,18 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
         rightProjections.forall(_.isInstanceOf[AttributeReference]) &&
         // Cross joins are not supported because they increase the amount of data.
         condition.isDefined &&
+        // Do not push down join if either side has a pushed sample with
+        // fraction < 1, because the merged scan builder would silently
+        // discard it and change the result. At fraction = 1 without
+        // replacement the sample is a no-op on the result set, so dropping
+        // it is safe. With replacement (Poisson sampling), even fraction 1
+        // can emit each row 0, 1, 2, ... times, so it is not a no-op.
+        // TODO(SPARK-56504): Extend SupportsPushDownJoin to accept pushed
+        //   samples so sources supporting both can handle the composition.
+        leftHolder.pushedSample.forall(s =>
+          !s.withReplacement && s.upperBound - s.lowerBound >= 1.0) &&
+        rightHolder.pushedSample.forall(s =>
+          !s.withReplacement && s.upperBound - s.lowerBound >= 1.0) &&
         lBuilder.isOtherSideCompatibleForJoin(rBuilder) =>
       // Process left and right columns in original order
       val (leftSideRequiredColumnsWithAliases, rightSideRequiredColumnsWithAliases) =
@@ -844,15 +857,26 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
           sample.lowerBound,
           sample.upperBound,
           sample.withReplacement,
-          sample.seed.getOrElse((math.random() * 1000).toLong))
+          // TODO(SPARK-56573): The * 1000 limits the seed to only 1000 distinct values.
+          //   Kept here for consistency with SampleExec.resolvedSeed; will be fixed
+          //   across all call sites in SPARK-56573.
+          sample.seed.getOrElse((math.random() * 1000).toLong),
+          sampleMethod = sample.sampleMethod)
         val pushed = PushDownUtils.pushTableSample(sHolder.builder, tableSample)
         if (pushed) {
           sHolder.pushedSample = Some(tableSample)
           sample.child
+        } else if (sample.sampleMethod == SampleMethod.System) {
+          throw new AnalysisException(
+            errorClass = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM",
+            messageParameters = Map.empty)
         } else {
           sample
         }
-
+      case _ if sample.sampleMethod == SampleMethod.System =>
+        throw new AnalysisException(
+          errorClass = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_NO_SCAN",
+          messageParameters = Map.empty)
       case _ => sample
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index d21b5c730f0ca..c268cd963b802 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -334,8 +334,9 @@ class V2SessionCatalog(catalog: SessionCatalog)
   private def dropTableInternal(ident: Identifier, purge: Boolean = false): Boolean = {
     try {
       loadTable(ident) match {
-        case V1Table(v1Table) if v1Table.tableType == CatalogTableType.VIEW &&
-            !SQLConf.get.getConf(SQLConf.DROP_TABLE_VIEW_ENABLED) =>
+        case V1Table(v1Table)
+            if v1Table.isViewLike &&
+              !SQLConf.get.getConf(SQLConf.DROP_TABLE_VIEW_ENABLED) =>
           throw QueryCompilationErrors.wrongCommandForObjectTypeError(
             operation = "DROP TABLE",
             requiredType = s"${CatalogTableType.EXTERNAL.name} or" +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2TableRefreshUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2TableRefreshUtil.scala
index 151329de9e6f2..46359f1fa8a2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2TableRefreshUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2TableRefreshUtil.scala
@@ -36,6 +36,7 @@ private[sql] object V2TableRefreshUtil extends SQLConfHelper with Logging {
    *
    * This method reloads table metadata from the catalog and validates:
    *  - Table identity: Ensures table ID has not changed
+   *  - Column IDs: Verifies column IDs have not changed
    *  - Data columns: Verifies captured columns align with the current schema
    *  - Metadata columns: Checks metadata column consistency
    *
@@ -62,6 +63,7 @@ private[sql] object V2TableRefreshUtil extends SQLConfHelper with Logging {
    *
    * This method reloads table metadata from the catalog and validates:
    *  - Table identity: Ensures table ID has not changed
+   *  - Column IDs: Verifies column IDs have not changed
    *  - Data columns: Verifies captured columns align with the current schema
    *  - Metadata columns: Checks metadata column consistency
    *
@@ -95,6 +97,7 @@ private[sql] object V2TableRefreshUtil extends SQLConfHelper with Logging {
           }
         })
         validateTableIdentity(currentTable, r)
+        validateColumnIds(currentTable, r)
         validateDataColumns(currentTable, r, schemaValidationMode)
         validateMetadataColumns(currentTable, r, schemaValidationMode)
         r.copy(table = currentTable)
@@ -119,11 +122,15 @@ private[sql] object V2TableRefreshUtil extends SQLConfHelper with Logging {
   }
 
   private def validateTableIdentity(currentTable: Table, relation: DataSourceV2Relation): Unit = {
-    if (relation.table.id != null && relation.table.id != currentTable.id) {
-      throw QueryCompilationErrors.tableIdChangedAfterAnalysis(
-        relation.name,
-        capturedTableId = relation.table.id,
-        currentTableId = currentTable.id)
+    V2TableUtil.validateTableId(relation.name, relation.table.id, currentTable)
+  }
+
+  private def validateColumnIds(
+      currentTable: Table,
+      relation: DataSourceV2Relation): Unit = {
+    val errors = V2TableUtil.validateColumnIds(currentTable, relation)
+    if (errors.nonEmpty) {
+      throw QueryCompilationErrors.columnIdMismatchAfterAnalysis(relation.name, errors)
     }
   }
 
@@ -133,7 +140,7 @@ private[sql] object V2TableRefreshUtil extends SQLConfHelper with Logging {
       mode: SchemaValidationMode): Unit = {
     val errors = V2TableUtil.validateCapturedColumns(currentTable, relation, mode)
     if (errors.nonEmpty) {
-      throw QueryCompilationErrors.columnsChangedAfterAnalysis(relation.name, errors)
+      throw QueryCompilationErrors.columnsMissingOrAddedAfterAnalysis(relation.name, errors)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ViewInspectionExecs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ViewInspectionExecs.scala
new file mode 100644
index 0000000000000..d1ceeba833ea7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ViewInspectionExecs.scala
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.mutable.ArrayBuffer
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, ResolveDefaultColumns}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumnsUtils.CURRENT_DEFAULT_COLUMN_METADATA_KEY
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, TableCatalog, TableSummary, ViewInfo}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * Read-side v2 view execs. Each receives the typed [[ViewInfo]] resolved at analysis time
+ * (carried on `ResolvedPersistentView.info`) and formats output rows directly from it --
+ * matching the way v2 table inspection execs (e.g. `ShowCreateTableExec`, `DescribeTableExec`)
+ * consume the [[org.apache.spark.sql.connector.catalog.Table]] attached to `ResolvedTable`.
+ *
+ * Only non-session v2 views land here; the session-catalog path is rewritten to v1 commands by
+ * `ResolveSessionCatalog` before strategy fires. The catalog name and identifier are passed
+ * alongside `viewInfo` for output formatting (qualified names, EXTENDED block headers).
+ */
+
+/**
+ * Physical plan node for SHOW CREATE TABLE on a v2 view. Reconstructs the {@code CREATE VIEW}
+ * statement directly from the typed [[ViewInfo]] -- the column list comes from
+ * [[ViewInfo#schema]], the body from [[ViewInfo#queryText]], the binding mode from
+ * [[ViewInfo#schemaMode]], and the user TBLPROPERTIES from [[ViewInfo#properties]] (with the
+ * reserved-keys filter applied so internal entries don't leak into the rendered DDL).
+ */
+case class ShowCreateV2ViewExec(
+    output: Seq[Attribute],
+    quotedName: String,
+    viewInfo: ViewInfo) extends LeafV2CommandExec with SQLConfHelper {
+
+  override protected def run(): Seq[InternalRow] = {
+    val builder = new StringBuilder
+    builder ++= s"CREATE VIEW $quotedName "
+    showViewDataColumns(builder)
+    Option(viewInfo.properties.get(TableCatalog.PROP_COMMENT)).foreach { c =>
+      builder ++= s"COMMENT '${escapeSingleQuotedString(c)}'\n"
+    }
+    Option(viewInfo.properties.get(TableCatalog.PROP_COLLATION)).foreach { c =>
+      builder ++= s"DEFAULT COLLATION $c\n"
+    }
+    showViewProperties(builder)
+    if (conf.viewSchemaBindingEnabled) {
+      Option(viewInfo.schemaMode).foreach { sm =>
+        builder ++= s"WITH SCHEMA $sm\n"
+      }
+    }
+    builder ++= s"AS ${viewInfo.queryText}\n"
+    Seq(toCatalystRow(builder.toString))
+  }
+
+  private def showViewDataColumns(builder: StringBuilder): Unit = {
+    val schema = viewInfo.schema
+    if (schema.nonEmpty) {
+      val cols = schema.map { f =>
+        val comment = f.getComment().map(c => s" COMMENT '${escapeSingleQuotedString(c)}'")
+        s"${quoteIfNeeded(f.name)}${comment.getOrElse("")}"
+      }
+      builder ++= cols.mkString("(\n  ", ",\n  ", ")\n")
+    }
+  }
+
+  private def showViewProperties(builder: StringBuilder): Unit = {
+    // Drop the reserved keys that either already appear as dedicated DDL clauses
+    // (PROP_COMMENT / PROP_COLLATION) or are otherwise managed outside user TBLPROPERTIES
+    // (PROP_OWNER, PROP_TABLE_TYPE, etc.). Mirrors the v1 SHOW CREATE TABLE filter, which
+    // hides the same first-class fields from the rendered TBLPROPERTIES clause.
+    val viewProps = viewInfo.properties.asScala
+      .filter { case (k, _) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) }
+    if (viewProps.nonEmpty) {
+      val props = viewProps.toSeq.sortBy(_._1).map { case (key, value) =>
+        s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
+      }
+      builder ++= s"TBLPROPERTIES ${props.mkString("(\n  ", ",\n  ", ")\n")}"
+    }
+  }
+}
+
+/**
+ * Physical plan node for SHOW TBLPROPERTIES on a v2 view. Returns the user-facing properties
+ * from [[ViewInfo#properties]] -- reserved first-class keys (PROP_COMMENT, PROP_COLLATION,
+ * PROP_OWNER, PROP_TABLE_TYPE, ...) are filtered out so users see only what they (or the
+ * catalog) explicitly set, matching v1 `SHOW TBLPROPERTIES` on a session-catalog view (which
+ * hides these because v1 stores them in typed `CatalogTable` fields rather than `properties`).
+ * A directly-requested reserved key still returns its value so users can ask for it by name.
+ */
+case class ShowV2ViewPropertiesExec(
+    output: Seq[Attribute],
+    quotedName: String,
+    viewInfo: ViewInfo,
+    propertyKey: Option[String]) extends LeafV2CommandExec with SQLConfHelper {
+
+  override protected def run(): Seq[InternalRow] = {
+    val rawProps = viewInfo.properties.asScala.toMap
+    val redacted = conf.redactOptions(rawProps)
+    propertyKey match {
+      case Some(p) =>
+        val propValue = redacted.getOrElse(p,
+          s"View $quotedName does not have property: $p")
+        if (output.length == 1) {
+          Seq(toCatalystRow(propValue))
+        } else {
+          Seq(toCatalystRow(p, propValue))
+        }
+      case None =>
+        redacted
+          .filter { case (k, _) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) }
+          .toSeq.sortBy(_._1).map(p => toCatalystRow(p._1, p._2))
+    }
+  }
+}
+
+/**
+ * Physical plan node for SHOW COLUMNS on a v2 view. Returns one row per top-level field in
+ * [[ViewInfo#schema]].
+ */
+case class ShowV2ViewColumnsExec(
+    output: Seq[Attribute],
+    viewInfo: ViewInfo) extends LeafV2CommandExec {
+
+  override protected def run(): Seq[InternalRow] = {
+    viewInfo.schema.map(c => toCatalystRow(c.name)).toSeq
+  }
+}
+
+/**
+ * Physical plan node for DESCRIBE TABLE on a v2 view. Schema rows first; when EXTENDED is
+ * specified, an additional `# Detailed View Information` block emits the v2-native fields:
+ * the resolved-identifier components via [[DescribeIdentifierRows#addIdentifierRows]] (which
+ * also surfaces a v1-compat `Database` row for single-segment namespaces), followed by view
+ * text, captured creation context, schema-binding mode, query column names, and user
+ * TBLPROPERTIES. v2 views are unpartitioned by definition, so the partition-spec branch from
+ * v1 `DescribeTableCommand` is unreachable here.
+ */
+case class DescribeV2ViewExec(
+    output: Seq[Attribute],
+    catalogName: String,
+    identifier: Identifier,
+    viewInfo: ViewInfo,
+    isExtended: Boolean) extends DescribeIdentifierRows with SQLConfHelper {
+
+  override protected def run(): Seq[InternalRow] = {
+    val result = new ArrayBuffer[InternalRow]
+    viewInfo.schema.foreach { col =>
+      result += toCatalystRow(col.name, col.dataType.simpleString, col.getComment().orNull)
+    }
+    if (isExtended) {
+      result += toCatalystRow("", "", "")
+      result += toCatalystRow("# Detailed View Information", "", "")
+      addIdentifierRows(result, catalogName, identifier, entityLabel = "View")
+      // Surface the view sub-kind so users see whether they're looking at a plain VIEW
+      // or a sub-kind like METRIC_VIEW. `ViewInfo`'s constructor unconditionally stamps
+      // `PROP_TABLE_TYPE` (defaulting to `VIEW`), so this row is always present and
+      // matches v1 `CatalogTable.toJsonLinkedHashMap`'s `Type` row for parity.
+      result += toCatalystRow(
+        "Type",
+        Option(viewInfo.properties.get(TableCatalog.PROP_TABLE_TYPE))
+          .getOrElse(TableSummary.VIEW_TABLE_TYPE),
+        "")
+      // Promote first-class reserved fields (Owner / Comment / Collation) to top-level rows
+      // before the EXTENDED Properties block, mirroring v1 `CatalogTable.toJsonLinkedHashMap`
+      // which renders these as their own rows rather than burying them in `Table Properties`.
+      Option(viewInfo.properties.get(TableCatalog.PROP_OWNER)).filter(_.nonEmpty).foreach { o =>
+        result += toCatalystRow("Owner", o, "")
+      }
+      Option(viewInfo.properties.get(TableCatalog.PROP_COMMENT)).foreach { c =>
+        result += toCatalystRow("Comment", c, "")
+      }
+      Option(viewInfo.properties.get(TableCatalog.PROP_COLLATION)).foreach { c =>
+        result += toCatalystRow("Collation", c, "")
+      }
+      result += toCatalystRow("View Text", viewInfo.queryText, "")
+      Option(viewInfo.currentCatalog).foreach { c =>
+        result += toCatalystRow("View Current Catalog", c, "")
+      }
+      val ns = viewInfo.currentNamespace
+      if (ns != null && ns.nonEmpty) {
+        result += toCatalystRow(
+          "View Current Namespace", ns.map(quoteIfNeeded).mkString("."), "")
+      }
+      Option(viewInfo.schemaMode).foreach { sm =>
+        result += toCatalystRow("View Schema Mode", sm, "")
+      }
+      val queryColumns = viewInfo.queryColumnNames
+      if (queryColumns != null && queryColumns.nonEmpty) {
+        result += toCatalystRow(
+          "View Query Output Columns", queryColumns.mkString("[", ", ", "]"), "")
+      }
+      // Filter the same reserved set as `ShowV2ViewPropertiesExec` so the EXTENDED
+      // `Properties` row mirrors `SHOW TBLPROPERTIES` and matches v1 (which hides these
+      // first-class fields because they live in typed `CatalogTable` fields).
+      val userProps = viewInfo.properties.asScala
+        .filter { case (k, _) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) }
+      if (userProps.nonEmpty) {
+        val props = conf.redactOptions(userProps.toMap).toSeq.sortBy(_._1).map {
+          case (k, v) => s"$k=$v"
+        }.mkString("[", ", ", "]")
+        result += toCatalystRow("Properties", props, "")
+      }
+    }
+    ResolveDefaultColumns.getDescribeMetadata(viewInfo.schema).foreach { row =>
+      result += toCatalystRow(row._1, row._2, row._3)
+    }
+    result.toSeq
+  }
+}
+
+/**
+ * Physical plan node for DESCRIBE TABLE ... COLUMN on a v2 view. The column nameParts are
+ * extracted at strategy time from the (already-resolved) column expression on
+ * `DescribeColumn`, so this exec doesn't have to deal with resolution. v2 views don't carry
+ * column statistics, so the EXTENDED branch in v1 emits `NULL` for every stat row -- we
+ * follow the same shape.
+ */
+case class DescribeV2ViewColumnExec(
+    output: Seq[Attribute],
+    viewInfo: ViewInfo,
+    colNameParts: Seq[String],
+    isExtended: Boolean) extends LeafV2CommandExec with SQLConfHelper {
+
+  override protected def run(): Seq[InternalRow] = {
+    val resolver = conf.resolver
+    val colName = colNameParts.mkString(".")
+    if (colNameParts.length > 1) {
+      throw QueryCompilationErrors.commandNotSupportNestedColumnError(
+        "DESC TABLE COLUMN", colName)
+    }
+    val field = viewInfo.schema.fields
+      .find(f => resolver(f.name, colNameParts.head))
+      .getOrElse(throw QueryCompilationErrors.columnNotFoundError(colName))
+    val dataType = field.dataType.catalogString
+    val comment = field.getComment().orNull
+    val rows = ArrayBuffer[InternalRow](
+      toCatalystRow("col_name", field.name),
+      toCatalystRow("data_type", dataType),
+      toCatalystRow("comment", if (comment == null) "NULL" else comment)
+    )
+    if (isExtended) {
+      // v2 views carry no column stats; emit NULL placeholders matching v1 output shape.
+      Seq("min", "max", "num_nulls", "distinct_count", "avg_col_len", "max_col_len",
+        "histogram").foreach { name =>
+        rows += toCatalystRow(name, "NULL")
+      }
+      if (field.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) {
+        rows += toCatalystRow("default",
+          field.metadata.getString(CURRENT_DEFAULT_COLUMN_METADATA_KEY))
+      }
+    }
+    rows.toSeq
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
index d8e871bcf4824..be8e96e8034d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
@@ -22,11 +22,11 @@ import java.util.UUID
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceData, WriteDelta}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertOnlyMerge, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceData, WriteDelta}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
-import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.write.{DeltaWriteBuilder, LogicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwriteV2, SupportsTruncate, Write, WriteBuilder}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -51,6 +51,13 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       a.copy(write = Some(write), query = newQuery)
 
+    case m @ InsertOnlyMerge(r: DataSourceV2Relation, query, None, _) =>
+      val writeOptions = r.options.asCaseSensitiveMap.asScala.toMap
+      val writeBuilder = newWriteBuilder(r.table, writeOptions, query.schema)
+      val write = writeBuilder.build()
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
+      m.copy(write = Some(write), query = newQuery)
+
     case o @ OverwriteByExpression(
         r: DataSourceV2Relation, deleteExpr, query, options, _, _, None, _) =>
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
@@ -91,17 +98,15 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       o.copy(write = Some(write), query = newQuery)
 
     case WriteToMicroBatchDataSource(
-        relationOpt, table, query, queryId, options, outputMode, Some(batchId)) =>
-      val writeOptions = mergeOptions(
-        options,
-        relationOpt.map(r => r.options.asCaseSensitiveMap.asScala.toMap).getOrElse(Map.empty))
+        r: DataSourceV2Relation, query, queryId, options, outputMode, Some(batchId)) =>
+      val table = r.table
+      val writeOptions = mergeOptions(options, r.options.asCaseSensitiveMap.asScala.toMap)
       val writeBuilder = newWriteBuilder(table, writeOptions, query.schema, queryId = queryId)
       val write = buildWriteForMicroBatch(table, writeBuilder, outputMode)
       val microBatchWrite = new MicroBatchWrite(batchId, write.toStreaming)
       val customMetrics = write.supportedCustomMetrics.toImmutableArraySeq
-      val funCatalogOpt = relationOpt.flatMap(_.funCatalog)
-      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, funCatalogOpt)
-      WriteToDataSourceV2(relationOpt, microBatchWrite, newQuery, customMetrics)
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
+      WriteToDataSourceV2(Some(r), microBatchWrite, newQuery, customMetrics)
 
     case rd @ ReplaceData(r: DataSourceV2Relation, _, query, _, projections, _, None) =>
       val rowSchema = projections.rowProjection.schema
@@ -112,7 +117,7 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       rd.copy(write = Some(write), query = newQuery)
 
-    case wd @ WriteDelta(r: DataSourceV2Relation, _, query, _, projections, None) =>
+    case wd @ WriteDelta(r: DataSourceV2Relation, _, query, _, projections, _, None) =>
       val writeOptions = mergeOptions(Map.empty, r.options.asCaseSensitiveMap.asScala.toMap)
       val deltaWriteBuilder = newDeltaWriteBuilder(r.table, writeOptions, projections)
       val deltaWrite = deltaWriteBuilder.build()
@@ -131,7 +136,7 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   private def buildWriteForMicroBatch(
-      table: SupportsWrite,
+      table: Table,
       writeBuilder: WriteBuilder,
       outputMode: OutputMode): Write = {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 6bb1eb6f4b6d6..33709fbd5f5a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -26,21 +26,23 @@ import org.apache.spark.sql.catalyst.{InternalRow, ProjectingInternalRow}
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, TableSpec, UnaryNode}
+import org.apache.spark.sql.catalyst.transactions.TransactionUtils
 import org.apache.spark.sql.catalyst.util.{removeInternalMetadata, CharVarcharUtils, ReplaceDataProjections, WriteDeltaProjections}
 import org.apache.spark.sql.catalyst.util.RowDeltaUtils.{COPY_OPERATION, DELETE_OPERATION, INSERT_OPERATION, REINSERT_OPERATION, UPDATE_OPERATION}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog, TableInfo, TableWritePrivilege}
+import org.apache.spark.sql.connector.catalog.transactions.Transaction
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.metric.CustomMetric
-import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, DeleteSummaryImpl, DeltaWrite, DeltaWriter, MergeSummaryImpl, PhysicalWriteInfoImpl, RowLevelOperation, RowLevelOperationTable, UpdateSummaryImpl, Write, WriterCommitMessage, WriteSummary}
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, DeleteSummaryImpl, DeltaWrite, DeltaWriter, InsertSummaryImpl, MergeSummaryImpl, PhysicalWriteInfoImpl, RowLevelOperation, RowLevelOperationTable, UpdateSummaryImpl, Write, WriterCommitMessage, WriteSummary}
 import org.apache.spark.sql.connector.write.RowLevelOperation.Command._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SQLExecution, UnaryExecNode}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.spark.sql.execution.metric.{CustomMetrics, SQLMetric, SQLMetrics}
+import org.apache.spark.sql.execution.metric.{CustomMetrics, SQLLastAttemptMetric, SQLLastAttemptMetrics, SQLMetric, SQLMetrics}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.SchemaValidationMode.PROHIBIT_CHANGES
-import org.apache.spark.util.{LongAccumulator, Utils}
 import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.util.Utils
 
 /**
  * Deprecated logical plan for writing data into data source v2. This is being replaced by more
@@ -74,7 +76,12 @@ case class CreateTableAsSelectExec(
     query: LogicalPlan,
     tableSpec: TableSpec,
     writeOptions: Map[String, String],
-    ifNotExists: Boolean) extends V2CreateTableAsSelectBaseExec {
+    ifNotExists: Boolean,
+    transaction: Option[Transaction] = None)
+  extends V2CreateTableAsSelectBaseExec with TransactionalExec {
+
+  override def withTransaction(txn: Option[Transaction]): CreateTableAsSelectExec =
+    copy(transaction = txn)
 
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
@@ -92,7 +99,9 @@ case class CreateTableAsSelectExec(
       .build()
     val table = Option(catalog.createTable(ident, tableInfo))
       .getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
-    writeToTable(catalog, table, writeOptions, ident, query, overwrite = false)
+    val result = writeToTable(catalog, table, writeOptions, ident, query, overwrite = false)
+    transaction.foreach(TransactionUtils.commit)
+    result
   }
 }
 
@@ -112,7 +121,8 @@ case class AtomicCreateTableAsSelectExec(
     query: LogicalPlan,
     tableSpec: TableSpec,
     writeOptions: Map[String, String],
-    ifNotExists: Boolean) extends V2CreateTableAsSelectBaseExec {
+    ifNotExists: Boolean)
+  extends V2CreateTableAsSelectBaseExec {
 
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
@@ -155,8 +165,12 @@ case class ReplaceTableAsSelectExec(
     tableSpec: TableSpec,
     writeOptions: Map[String, String],
     orCreate: Boolean,
-    invalidateCache: (TableCatalog, Identifier) => Unit)
-  extends V2CreateTableAsSelectBaseExec {
+    invalidateCache: (TableCatalog, Identifier) => Unit,
+    transaction: Option[Transaction] = None)
+  extends V2CreateTableAsSelectBaseExec with TransactionalExec {
+
+  override def withTransaction(txn: Option[Transaction]): ReplaceTableAsSelectExec =
+    copy(transaction = txn)
 
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
@@ -192,9 +206,11 @@ case class ReplaceTableAsSelectExec(
       .build()
     val table = Option(catalog.createTable(ident, tableInfo))
       .getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
-    writeToTable(
+    val result = writeToTable(
       catalog, table, writeOptions, ident, refreshedQuery,
       overwrite = true, refreshPhaseEnabled = false)
+    transaction.foreach(TransactionUtils.commit)
+    result
   }
 }
 
@@ -272,9 +288,43 @@ case class AtomicReplaceTableAsSelectExec(
 case class AppendDataExec(
     query: SparkPlan,
     refreshCache: () => Unit,
-    write: Write) extends V2ExistingTableWriteExec {
+    write: Write,
+    tableName: String,
+    transaction: Option[Transaction] = None) extends V2ExistingTableWriteExec {
+  override def withTransaction(txn: Option[Transaction]): AppendDataExec = copy(transaction = txn)
   override protected def withNewChildInternal(newChild: SparkPlan): AppendDataExec =
     copy(query = newChild)
+
+  override protected def getWriteSummary(): Option[WriteSummary] =
+    Some(InsertSummaryImpl(numInsertedRows = numOutputRowsMetric.value))
+}
+
+/**
+ * Physical plan for an insert-only MERGE rewrite. Behaves like [[AppendDataExec]] but emits a
+ * [[org.apache.spark.sql.connector.write.MergeSummary]] so commit metadata reports the operation
+ * as a MERGE, with all output rows accounted for as inserts.
+ */
+case class InsertOnlyMergeExec(
+    query: SparkPlan,
+    refreshCache: () => Unit,
+    write: Write,
+    tableName: String,
+    transaction: Option[Transaction] = None) extends V2ExistingTableWriteExec {
+  override def withTransaction(txn: Option[Transaction]): InsertOnlyMergeExec =
+    copy(transaction = txn)
+  override protected def withNewChildInternal(newChild: SparkPlan): InsertOnlyMergeExec =
+    copy(query = newChild)
+
+  override protected def getWriteSummary(): Option[WriteSummary] =
+    Some(MergeSummaryImpl(
+      numTargetRowsCopied = 0L,
+      numTargetRowsDeleted = 0L,
+      numTargetRowsUpdated = 0L,
+      numTargetRowsInserted = numOutputRowsMetric.value,
+      numTargetRowsMatchedUpdated = 0L,
+      numTargetRowsMatchedDeleted = 0L,
+      numTargetRowsNotMatchedBySourceUpdated = 0L,
+      numTargetRowsNotMatchedBySourceDeleted = 0L))
 }
 
 /**
@@ -290,7 +340,11 @@ case class AppendDataExec(
 case class OverwriteByExpressionExec(
     query: SparkPlan,
     refreshCache: () => Unit,
-    write: Write) extends V2ExistingTableWriteExec {
+    write: Write,
+    tableName: String,
+    transaction: Option[Transaction] = None) extends V2ExistingTableWriteExec {
+  override def withTransaction(txn: Option[Transaction]): OverwriteByExpressionExec =
+    copy(transaction = txn)
   override protected def withNewChildInternal(newChild: SparkPlan): OverwriteByExpressionExec =
     copy(query = newChild)
 }
@@ -307,7 +361,11 @@ case class OverwriteByExpressionExec(
 case class OverwritePartitionsDynamicExec(
     query: SparkPlan,
     refreshCache: () => Unit,
-    write: Write) extends V2ExistingTableWriteExec {
+    write: Write,
+    tableName: String,
+    transaction: Option[Transaction] = None) extends V2ExistingTableWriteExec {
+  override def withTransaction(txn: Option[Transaction]): OverwritePartitionsDynamicExec =
+    copy(transaction = txn)
   override protected def withNewChildInternal(newChild: SparkPlan): OverwritePartitionsDynamicExec =
     copy(query = newChild)
 }
@@ -320,39 +378,42 @@ case class ReplaceDataExec(
     refreshCache: () => Unit,
     projections: ReplaceDataProjections,
     write: Write,
-    rowLevelCommand: RowLevelOperation.Command) extends RowLevelWriteExec {
+    rowLevelCommand: RowLevelOperation.Command,
+    tableName: String,
+    transaction: Option[Transaction] = None) extends RowLevelWriteExec {
 
   override def writingTask: WritingSparkTask[_] = {
     projections.metadataProjection match {
       case Some(metadataProj) =>
-        DataAndMetadataWritingSparkTask(projections.rowProjection, metadataProj, operationMetrics)
+        DataAndMetadataWritingSparkTask(projections.rowProjection, metadataProj, sparkMetrics)
       case None =>
-        DataWithProjectionWritingSparkTask(projections.rowProjection, operationMetrics)
+        DataWithProjectionWritingSparkTask(projections.rowProjection, sparkMetrics)
     }
   }
 
+  override def withTransaction(txn: Option[Transaction]): ReplaceDataExec = copy(transaction = txn)
   override protected def withNewChildInternal(newChild: SparkPlan): ReplaceDataExec = {
     copy(query = newChild)
   }
 
-  override protected def getWriteSummary(query: SparkPlan): Option[WriteSummary] = {
-    if (rowLevelCommand == DELETE) {
-      // DELETE ReplaceData plans filter out the deleted rows early in the plan, and they don't
-      // reach this node. We need to calculate this value as numScannedRows - numCopiedRows.
-      val numScannedRows = collectFirst(query) {
-        case b: BatchScanExec if b.table.isInstanceOf[RowLevelOperationTable] =>
-          getMetricValue(b.metrics, "numOutputRows")
-      }
-      val numCopiedRows = getMetricValue(metrics, "numCopiedRows")
-      val numDeletedRows = if (numScannedRows.exists(_ >= 0) && numCopiedRows >= 0) {
-        numScannedRows.get - numCopiedRows
-      } else {
-        // One of the metrics couldn't be found, also mark numDeletedRows as not found.
-        -1L
-      }
-      metrics("numDeletedRows").set(numDeletedRows)
+  override protected def getDeleteSummary(): Option[DeleteSummaryImpl] = {
+    // DELETE ReplaceData plans filter out the deleted rows early in the plan, and they don't
+    // reach this node. We need to calculate this value as numScannedRows - numCopiedRows.
+    val numScannedRows = collectFirst(query) {
+      case b: BatchScanExec if b.table.isInstanceOf[RowLevelOperationTable] =>
+        getMetricValue(b.metrics, "numOutputRows")
+    }
+    val numCopiedRows = getMetricValue(sparkMetrics, "numCopiedRows")
+    val numDeletedRows = if (numScannedRows.exists(_ >= 0) && numCopiedRows >= 0) {
+      numScannedRows.get - numCopiedRows
+    } else {
+      // One of the metrics couldn't be found, also mark numDeletedRows as not found.
+      -1L
     }
-    super.getWriteSummary(query)
+
+    // SQLMetric.set is a no-op if value is -1, leaving the metric in its invalid state.
+    sparkMetrics("numDeletedRows").set(numDeletedRows)
+    super.getDeleteSummary().map(_.copy(numDeletedRows = numDeletedRows))
   }
 }
 
@@ -364,16 +425,19 @@ case class WriteDeltaExec(
     refreshCache: () => Unit,
     projections: WriteDeltaProjections,
     write: DeltaWrite,
-    rowLevelCommand: RowLevelOperation.Command) extends RowLevelWriteExec {
+    rowLevelCommand: RowLevelOperation.Command,
+    tableName: String,
+    transaction: Option[Transaction] = None) extends RowLevelWriteExec {
 
   override lazy val writingTask: WritingSparkTask[_] = {
     if (projections.metadataProjection.isDefined) {
-      DeltaWithMetadataWritingSparkTask(projections, operationMetrics)
+      DeltaWithMetadataWritingSparkTask(projections, sparkMetrics)
     } else {
-      DeltaWritingSparkTask(projections, operationMetrics)
+      DeltaWritingSparkTask(projections, sparkMetrics)
     }
   }
 
+  override def withTransaction(txn: Option[Transaction]): WriteDeltaExec = copy(transaction = txn)
   override protected def withNewChildInternal(newChild: SparkPlan): WriteDeltaExec = {
     copy(query = newChild)
   }
@@ -383,16 +447,20 @@ case class WriteToDataSourceV2Exec(
     batchWrite: BatchWrite,
     refreshCache: () => Unit,
     query: SparkPlan,
-    writeMetrics: Seq[CustomMetric]) extends V2TableWriteExec {
+    writeMetrics: Seq[CustomMetric],
+    transaction: Option[Transaction] = None) extends V2TableWriteExec with TransactionalExec {
+
+  override def withTransaction(txn: Option[Transaction]): WriteToDataSourceV2Exec =
+    copy(transaction = txn)
 
   override def stringArgs: Iterator[Any] = Iterator(batchWrite, query)
 
-  override val customMetrics: Map[String, SQLMetric] = writeMetrics.map { customMetric =>
-    customMetric.name() -> SQLMetrics.createV2CustomMetric(sparkContext, customMetric)
-  }.toMap
+  override lazy val customMetrics: Map[String, SQLMetric] =
+    createCustomMetrics(writeMetrics.toArray)
 
   override protected def run(): Seq[InternalRow] = {
     val writtenRows = writeWithV2(batchWrite)
+    transaction.foreach(TransactionUtils.commit)
     refreshCache()
     writtenRows
   }
@@ -401,38 +469,37 @@ case class WriteToDataSourceV2Exec(
     copy(query = newChild)
 }
 
-trait V2ExistingTableWriteExec extends V2TableWriteExec {
+/**
+ * Trait for physical plan nodes that write to an existing table as part of a transaction.
+ * The [[transaction]] is injected post-planning by [[QueryExecution]].
+ */
+trait TransactionalExec extends SparkPlan {
+  def transaction: Option[Transaction]
+  def withTransaction(txn: Option[Transaction]): SparkPlan
+}
+
+trait V2ExistingTableWriteExec extends V2TableWriteExec with TransactionalExec {
   def refreshCache: () => Unit
   def write: Write
+  def tableName: String
 
   override def stringArgs: Iterator[Any] = Iterator(query, write)
 
-  override val customMetrics: Map[String, SQLMetric] =
-    write.supportedCustomMetrics().map { customMetric =>
-      customMetric.name() -> SQLMetrics.createV2CustomMetric(sparkContext, customMetric)
-    }.toMap
+  override def nodeName: String = s"${super.nodeName} $tableName"
+
+  override lazy val customMetrics: Map[String, SQLMetric] =
+    createCustomMetrics(write.supportedCustomMetrics())
 
   override protected def run(): Seq[InternalRow] = {
     val writtenRows = try {
       writeWithV2(write.toBatch)
     } finally {
-      postDriverMetrics()
+      postDriverMetrics(write.reportDriverMetrics())
     }
+    transaction.foreach(TransactionUtils.commit)
     refreshCache()
     writtenRows
   }
-
-  protected def postDriverMetrics(): Unit = {
-    val driveSQLMetrics = write.reportDriverMetrics().map(customTaskMetric => {
-      val metric = metrics(customTaskMetric.name())
-      metric.set(customTaskMetric.value())
-      metric
-    })
-
-    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
-      driveSQLMetrics.toImmutableArraySeq)
-  }
 }
 
 /**
@@ -441,56 +508,82 @@ trait V2ExistingTableWriteExec extends V2TableWriteExec {
 trait RowLevelWriteExec extends V2ExistingTableWriteExec {
   def rowLevelCommand: RowLevelOperation.Command
 
-  override lazy val operationMetrics: Map[String, SQLMetric] = rowLevelCommand match {
-    case UPDATE =>
-      Map(
-        "numUpdatedRows" -> SQLMetrics.createMetric(sparkContext, "number of updated rows"),
-        "numCopiedRows" -> SQLMetrics.createMetric(sparkContext, "number of copied rows"))
-    case DELETE =>
-      Map(
-        "numDeletedRows" -> SQLMetrics.createMetric(sparkContext, "number of deleted rows"),
-        "numCopiedRows" -> SQLMetrics.createMetric(sparkContext, "number of copied rows"))
-    case _ => Map.empty
-  }
+  override protected lazy val sparkMetrics: Map[String, SQLMetric] = super.sparkMetrics ++ (
+    rowLevelCommand match {
+      case UPDATE =>
+        Map(
+          "numUpdatedRows" ->
+            SQLLastAttemptMetrics.createMetric(sparkContext, "number of updated rows"),
+          "numCopiedRows" ->
+            SQLLastAttemptMetrics.createMetric(sparkContext, "number of copied rows"))
+      case DELETE =>
+        Map(
+          "numDeletedRows" ->
+            SQLLastAttemptMetrics.createMetric(sparkContext, "number of deleted rows"),
+          "numCopiedRows" ->
+            SQLLastAttemptMetrics.createMetric(sparkContext, "number of copied rows"))
+      case _ => Map.empty
+    })
 
   /**
-   * Returns the value of the named metric, or -1 if the metric is not found.
+   * Returns the value of the named metric, or -1 if the metric is not found. For
+   * [[SQLLastAttemptMetric]] values, prefers the last-attempt value so the result is stable across
+   * stage retries; falls back to the regular accumulator value if the last-attempt value is
+   * unavailable (e.g. the accumulator bailed out).
    */
   protected def getMetricValue(metrics: Map[String, SQLMetric], name: String): Long = {
-    metrics.get(name).map(_.value).getOrElse(-1L)
+    metrics.get(name).map {
+      case slam: SQLLastAttemptMetric =>
+        slam.lastAttemptValueForHighestRDDId().getOrElse(slam.value)
+      case m => m.value
+    }.getOrElse(-1L)
   }
 
-  override protected def getWriteSummary(query: SparkPlan): Option[WriteSummary] = {
+  override protected def getWriteSummary(): Option[WriteSummary] = {
     rowLevelCommand match {
-      case MERGE =>
-        collectFirst(query) { case m: MergeRowsExec => m }.map { n =>
-          val metrics = n.metrics
-          MergeSummaryImpl(
-            getMetricValue(metrics, "numTargetRowsCopied"),
-            getMetricValue(metrics, "numTargetRowsDeleted"),
-            getMetricValue(metrics, "numTargetRowsUpdated"),
-            getMetricValue(metrics, "numTargetRowsInserted"),
-            getMetricValue(metrics, "numTargetRowsMatchedUpdated"),
-            getMetricValue(metrics, "numTargetRowsMatchedDeleted"),
-            getMetricValue(metrics, "numTargetRowsNotMatchedBySourceUpdated"),
-            getMetricValue(metrics, "numTargetRowsNotMatchedBySourceDeleted"))
-        }
-      case UPDATE =>
-        Some(UpdateSummaryImpl(
-          getMetricValue(operationMetrics, "numUpdatedRows"),
-          getMetricValue(operationMetrics, "numCopiedRows")))
-      case DELETE =>
-        Some(DeleteSummaryImpl(
-          getMetricValue(operationMetrics, "numDeletedRows"),
-          getMetricValue(operationMetrics, "numCopiedRows")))
+      case MERGE => getMergeSummary()
+      case UPDATE => getUpdateSummary()
+      case DELETE => getDeleteSummary()
     }
   }
+
+  protected def getMergeSummary(): Option[MergeSummaryImpl] = {
+    collectFirst(query) { case m: MergeRowsExec => m }.map { n =>
+      val metrics = n.metrics
+      MergeSummaryImpl(
+        getMetricValue(metrics, "numTargetRowsCopied"),
+        getMetricValue(metrics, "numTargetRowsDeleted"),
+        getMetricValue(metrics, "numTargetRowsUpdated"),
+        getMetricValue(metrics, "numTargetRowsInserted"),
+        getMetricValue(metrics, "numTargetRowsMatchedUpdated"),
+        getMetricValue(metrics, "numTargetRowsMatchedDeleted"),
+        getMetricValue(metrics, "numTargetRowsNotMatchedBySourceUpdated"),
+        getMetricValue(metrics, "numTargetRowsNotMatchedBySourceDeleted"))
+    }
+  }
+
+  protected def getUpdateSummary(): Option[UpdateSummaryImpl] = {
+    Some(UpdateSummaryImpl(
+      getMetricValue(sparkMetrics, "numUpdatedRows"),
+      getMetricValue(sparkMetrics, "numCopiedRows")))
+  }
+
+  protected def getDeleteSummary(): Option[DeleteSummaryImpl] = {
+    Some(DeleteSummaryImpl(
+      getMetricValue(sparkMetrics, "numDeletedRows"),
+      getMetricValue(sparkMetrics, "numCopiedRows")))
+  }
 }
 
 /**
  * The base physical plan for writing data into data source v2.
  */
-trait V2TableWriteExec extends V2CommandExec with UnaryExecNode with AdaptiveSparkPlanHelper {
+trait V2TableWriteExec
+  extends V2CommandExec
+  with UnaryExecNode
+  with AdaptiveSparkPlanHelper
+  with SupportsCustomDriverMetrics {
+
   def query: SparkPlan
   def writingTask: WritingSparkTask[_] = DataWritingSparkTask
 
@@ -499,10 +592,13 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode with AdaptiveSpa
   override def child: SparkPlan = query
   override def output: Seq[Attribute] = Nil
 
-  protected val customMetrics: Map[String, SQLMetric] = Map.empty
-  protected def operationMetrics: Map[String, SQLMetric] = Map.empty
+  override def customMetrics: Map[String, SQLMetric] = Map.empty
+
+  protected lazy val numOutputRowsMetric: SQLMetric =
+    SQLMetrics.createMetric(sparkContext, "number of output rows")
 
-  override lazy val metrics = customMetrics ++ operationMetrics
+  override protected def sparkMetrics: Map[String, SQLMetric] = Map(
+    "numOutputRows" -> numOutputRowsMetric)
 
   protected def writeWithV2(batchWrite: BatchWrite): Seq[InternalRow] = {
     val rdd: RDD[InternalRow] = {
@@ -521,7 +617,6 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode with AdaptiveSpa
       PhysicalWriteInfoImpl(rdd.getNumPartitions))
     val useCommitCoordinator = batchWrite.useCommitCoordinator
     val messages = new Array[WriterCommitMessage](rdd.partitions.length)
-    val totalNumRowsAccumulator = new LongAccumulator()
 
     logInfo(log"Start processing data source write support: " +
       log"${MDC(LogKeys.BATCH_WRITE, batchWrite)}. The input RDD has " +
@@ -539,19 +634,22 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode with AdaptiveSpa
         (index, result: DataWritingSparkTaskResult) => {
           val commitMessage = result.writerCommitMessage
           messages(index) = commitMessage
-          totalNumRowsAccumulator.add(result.numRows)
+          numOutputRowsMetric.add(result.numRows)
           batchWrite.onDataWriterCommit(commitMessage)
         }
       )
 
-      val writeSummary = getWriteSummary(query)
+      val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+      SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, Seq(numOutputRowsMetric))
+
+      val writeSummary = getWriteSummary()
       logInfo(log"Data source write support ${MDC(LogKeys.BATCH_WRITE, batchWrite)} is committing.")
       writeSummary match {
         case Some(summary) => batchWrite.commit(messages, summary)
         case None => batchWrite.commit(messages)
       }
       logInfo(log"Data source write support ${MDC(LogKeys.BATCH_WRITE, batchWrite)} committed.")
-      commitProgress = Some(StreamWriterCommitProgress(totalNumRowsAccumulator.value))
+      commitProgress = Some(StreamWriterCommitProgress(numOutputRowsMetric.value))
     } catch {
       case cause: Throwable =>
         logError(
@@ -572,7 +670,7 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode with AdaptiveSpa
     Nil
   }
 
-  protected def getWriteSummary(query: SparkPlan): Option[WriteSummary] = None
+  protected def getWriteSummary(): Option[WriteSummary] = None
 }
 
 trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serializable {
@@ -674,7 +772,7 @@ trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serial
 case class DataAndMetadataWritingSparkTask(
     dataProj: ProjectingInternalRow,
     metadataProj: ProjectingInternalRow,
-    operationMetrics: Map[String, SQLMetric])
+    sparkMetrics: Map[String, SQLMetric])
   extends WritingSparkTask[DataWriter[InternalRow]] {
 
   override protected def write(
@@ -708,14 +806,14 @@ case class DataAndMetadataWritingSparkTask(
       }
     }
 
-    operationMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
-    operationMetrics.get("numCopiedRows").foreach(_.add(numCopiedRows))
+    sparkMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
+    sparkMetrics.get("numCopiedRows").foreach(_.add(numCopiedRows))
   }
 }
 
 case class DataWithProjectionWritingSparkTask(
     dataProj: ProjectingInternalRow,
-    operationMetrics: Map[String, SQLMetric])
+    sparkMetrics: Map[String, SQLMetric])
   extends WritingSparkTask[DataWriter[InternalRow]] {
 
   override protected def write(
@@ -747,8 +845,8 @@ case class DataWithProjectionWritingSparkTask(
       }
     }
 
-    operationMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
-    operationMetrics.get("numCopiedRows").foreach(_.add(numCopiedRows))
+    sparkMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
+    sparkMetrics.get("numCopiedRows").foreach(_.add(numCopiedRows))
   }
 }
 
@@ -761,7 +859,7 @@ object DataWritingSparkTask extends WritingSparkTask[DataWriter[InternalRow]] {
 
 case class DeltaWritingSparkTask(
     projections: WriteDeltaProjections,
-    operationMetrics: Map[String, SQLMetric])
+    sparkMetrics: Map[String, SQLMetric])
   extends WritingSparkTask[DeltaWriter[InternalRow]] {
 
   private lazy val rowProjection = projections.rowProjection.orNull
@@ -804,14 +902,14 @@ case class DeltaWritingSparkTask(
       }
     }
 
-    operationMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
-    operationMetrics.get("numDeletedRows").foreach(_.add(numDeletedRows))
+    sparkMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
+    sparkMetrics.get("numDeletedRows").foreach(_.add(numDeletedRows))
   }
 }
 
 case class DeltaWithMetadataWritingSparkTask(
     projections: WriteDeltaProjections,
-    operationMetrics: Map[String, SQLMetric])
+    sparkMetrics: Map[String, SQLMetric])
   extends WritingSparkTask[DeltaWriter[InternalRow]] {
 
   private lazy val rowProjection = projections.rowProjection.orNull
@@ -858,8 +956,8 @@ case class DeltaWithMetadataWritingSparkTask(
       }
     }
 
-    operationMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
-    operationMetrics.get("numDeletedRows").foreach(_.add(numDeletedRows))
+    sparkMetrics.get("numUpdatedRows").foreach(_.add(numUpdatedRows))
+    sparkMetrics.get("numDeletedRows").foreach(_.add(numDeletedRows))
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala
index 75f3b04287aa4..510f3b525e97d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala
@@ -20,7 +20,6 @@ import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.read.V1Scan
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation
-import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ArrayImplicits._
@@ -31,7 +30,7 @@ case class JDBCScan(
     pushedPredicates: Array[Predicate],
     pushedAggregateColumn: Array[String] = Array(),
     groupByColumns: Option[Array[String]],
-    tableSample: Option[TableSampleInfo],
+    tableSampleClause: Option[String],
     pushedLimit: Int,
     sortOrders: Array[String],
     pushedOffset: Int) extends V1Scan {
@@ -46,7 +45,7 @@ case class JDBCScan(
       pushedPredicates,
       pushedAggregateColumn,
       groupByColumns,
-      tableSample,
+      tableSampleClause,
       pushedLimit,
       sortOrders,
       pushedOffset).asInstanceOf[T]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
index b758ddd35e0d2..45d5f920b9be7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
@@ -21,11 +21,12 @@ import scala.util.control.NonFatal
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.LogKeys.{JOIN_CONDITION, JOIN_TYPE, SCHEMA}
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils
 import org.apache.spark.sql.connector.expressions.{FieldReference, SortOrder}
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.join.JoinType
-import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownAggregates, SupportsPushDownJoin, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
+import org.apache.spark.sql.connector.read.{SampleMethod, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownJoin, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRDD, JDBCRelation}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
@@ -57,7 +58,7 @@ case class JDBCScanBuilder(
 
   private var finalSchema = schema
 
-  private var tableSample: Option[TableSampleInfo] = None
+  private var tableSampleClause: Option[String] = None
 
   private var pushedLimit = 0
 
@@ -251,7 +252,7 @@ case class JDBCScanBuilder(
     pushedPredicate = Array.empty[Predicate]
     // Table sample is pushed down already as well, so we need to reset it to None to not push it
     // down again when join pushdown is triggered again on this JDBCScanBuilder.
-    tableSample = None
+    tableSampleClause = None
 
     true
   }
@@ -275,10 +276,13 @@ case class JDBCScanBuilder(
       lowerBound: Double,
       upperBound: Double,
       withReplacement: Boolean,
-      seed: Long): Boolean = {
-    if (jdbcOptions.pushDownTableSample && dialect.supportsTableSample) {
-      this.tableSample = Some(TableSampleInfo(lowerBound, upperBound, withReplacement, seed))
-      return true
+      seed: Long,
+      sampleMethod: SampleMethod): Boolean = {
+    if (jdbcOptions.pushDownTableSample) {
+      val sample = TableSampleInfo(
+        lowerBound, upperBound, withReplacement, seed, V2ExpressionUtils.toCatalyst(sampleMethod))
+      this.tableSampleClause = dialect.compileTableSample(sample)
+      return this.tableSampleClause.isDefined
     }
     false
   }
@@ -343,7 +347,7 @@ case class JDBCScanBuilder(
     // be used in sql string.
     JDBCScan(JDBCRelation(schema, parts, jdbcOptions, additionalMetrics)(session),
       finalSchema, pushedPredicate, pushedAggregateList, pushedGroupBys,
-      tableSample, pushedLimit, sortOrders, pushedOffset)
+      tableSampleClause, pushedLimit, sortOrders, pushedOffset)
   }
 
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCV1RelationFromV2Scan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCV1RelationFromV2Scan.scala
index feb33effae236..ef08d0ad94b9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCV1RelationFromV2Scan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCV1RelationFromV2Scan.scala
@@ -20,7 +20,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation
-import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.types.StructType
 
@@ -35,7 +34,7 @@ case class JDBCV1RelationFromV2Scan(
     pushedPredicates: Array[Predicate],
     pushedAggregateColumn: Array[String] = Array(),
     groupByColumns: Option[Array[String]],
-    tableSample: Option[TableSampleInfo],
+    tableSampleClause: Option[String],
     pushedLimit: Int,
     sortOrders: Array[String],
     pushedOffset: Int) extends BaseRelation with TableScan {
@@ -49,8 +48,8 @@ case class JDBCV1RelationFromV2Scan(
       pushedAggregateColumn
     }
 
-    relation.buildScan(columnList, prunedSchema, pushedPredicates, groupByColumns, tableSample,
-      pushedLimit, sortOrders, pushedOffset)
+    relation.buildScan(columnList, prunedSchema, pushedPredicates, groupByColumns,
+      tableSampleClause, pushedLimit, sortOrders, pushedOffset)
   }
 
   override def toString: String = "JDBC v1 Relation from v2 scan"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
index 41971e60f5737..9f8409efa360e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
@@ -21,11 +21,10 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, DynamicPruningExpression, Expression, InSubquery, ListQuery, PredicateHelper, V2ExpressionUtils}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.optimizer.RewritePredicateSubquery
-import org.apache.spark.sql.catalyst.planning.GroupBasedRowLevelOperation
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LogicalPlan}
+import org.apache.spark.sql.catalyst.planning.{DeltaBasedRowLevelOperation, GroupBasedRowLevelOperation}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LogicalPlan, RowLevelWrite}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering
-import org.apache.spark.sql.connector.write.RowLevelOperation.Command
 import org.apache.spark.sql.connector.write.RowLevelOperation.Command.{DELETE, MERGE, UPDATE}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Implicits, DataSourceV2Relation, DataSourceV2ScanRelation, ExtractV2Scan}
 import org.apache.spark.util.ArrayImplicits._
@@ -34,13 +33,16 @@ import org.apache.spark.util.ArrayImplicits._
  * A rule that assigns a subquery to filter groups in row-level operations at runtime.
  *
  * Data skipping during job planning for row-level operations is limited to expressions that can be
- * converted to data source filters. Since not all expressions can be pushed down that way and
- * rewriting groups is expensive, Spark allows data sources to filter group at runtime.
- * If the primary scan in a group-based row-level operation supports runtime filtering, this rule
- * will inject a subquery to find all rows that match the condition so that data sources know
- * exactly which groups must be rewritten.
+ * converted to data source filters. Since not all expressions can be pushed down that way, Spark
+ * allows data sources to filter groups at runtime. If the primary scan in a row-level operation
+ * supports runtime filtering, this rule will inject a subquery to find all rows that match the
+ * condition so that data sources know exactly which groups have changes.
  *
- * Note this rule only applies to group-based row-level operations.
+ * Note that this rule is also beneficial for operations that deal with deltas of rows. Even if
+ * the data source is capable of handling specific changes, it is useful to first discard entire
+ * groups that are not modified. The cost of the runtime query is small as it only projects columns
+ * required to evaluate the row level operation condition. The main scan, on the other hand, must
+ * project all columns, meaning the cost of reading unaffected groups can dominate the runtime.
  */
 class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPlan])
   extends Rule[LogicalPlan] with PredicateHelper {
@@ -48,52 +50,61 @@ class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPla
   import DataSourceV2Implicits._
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
-    // apply special dynamic filtering only for group-based row-level operations
     case GroupBasedRowLevelOperation(replaceData, _, Some(cond),
-        ExtractV2Scan(scan: SupportsRuntimeV2Filtering))
-        if conf.runtimeRowLevelOperationGroupFilterEnabled && cond != TrueLiteral
-          && scan.filterAttributes().nonEmpty =>
-
-      // use reference equality on scan to find required scan relations
-      val newQuery = replaceData.query transformUp {
-        case r: DataSourceV2ScanRelation if r.scan eq scan =>
-          // use the original table instance that was loaded for this row-level operation
-          // in order to leverage a regular batch scan in the group filter query
-          val originalTable = r.relation.table.asRowLevelOperationTable.table
-          val relation = r.relation.copy(table = originalTable)
-          val tableAttrs = replaceData.table.output
-          val command = replaceData.operation.command
-          val matchingRowsPlan = buildMatchingRowsPlan(relation, cond, tableAttrs, command)
-
-          val filterAttrs = scan.filterAttributes.toImmutableArraySeq
-          val buildKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, matchingRowsPlan)
-          val pruningKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, r)
-          val dynamicPruningCond = buildDynamicPruningCond(matchingRowsPlan, buildKeys, pruningKeys)
-
-          Filter(dynamicPruningCond, r)
-      }
-
-      // optimize subqueries to rewrite them as joins and trigger job planning
-      replaceData.copy(query = optimizeSubqueries(newQuery))
+        ExtractV2Scan(scan: SupportsRuntimeV2Filtering)) if canInjectGroupFilters(cond, scan) =>
+      injectGroupFilters(replaceData, cond, scan)
+
+    case DeltaBasedRowLevelOperation(writeDelta, _, Some(cond),
+        ExtractV2Scan(scan: SupportsRuntimeV2Filtering)) if canInjectGroupFilters(cond, scan) =>
+      injectGroupFilters(writeDelta, cond, scan)
+  }
+
+  private def canInjectGroupFilters(
+      cond: Expression,
+      scan: SupportsRuntimeV2Filtering): Boolean = {
+    conf.runtimeRowLevelOperationGroupFilterEnabled &&
+      cond != TrueLiteral &&
+      scan.filterAttributes.nonEmpty
+  }
+
+  private def injectGroupFilters(
+      write: RowLevelWrite,
+      cond: Expression,
+      scan: SupportsRuntimeV2Filtering): LogicalPlan = {
+    // use reference equality on scan to find required scan relations
+    val newQuery = write.query transformUp {
+      case r: DataSourceV2ScanRelation if r.scan eq scan =>
+        // use the original table instance that was loaded for this row-level operation
+        // in order to leverage a regular batch scan in the group filter query
+        val originalTable = r.relation.table.asRowLevelOperationTable.table
+        val relation = r.relation.copy(table = originalTable)
+        val matchingRowsPlan = buildMatchingRowsPlan(write, relation, cond)
+        val filterAttrs = scan.filterAttributes.toImmutableArraySeq
+        val buildKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, matchingRowsPlan)
+        val pruningKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, r)
+        Filter(buildDynamicPruningCond(matchingRowsPlan, buildKeys, pruningKeys), r)
+    }
+    // optimize subqueries to rewrite them as joins and trigger job planning
+    write.withNewQuery(optimizeSubqueries(newQuery))
   }
 
   private def buildMatchingRowsPlan(
+      write: RowLevelWrite,
       relation: DataSourceV2Relation,
-      cond: Expression,
-      tableAttrs: Seq[Attribute],
-      command: Command): LogicalPlan = {
+      cond: Expression): LogicalPlan = {
 
-    val matchingRowsPlan = command match {
+    val matchingRowsPlan = write.operation.command match {
       case DELETE =>
         Filter(cond, relation)
 
       case UPDATE =>
-        // UPDATEs with subqueries are rewritten using UNION with two identical scan relations
+        // UPDATEs with subqueries can be rewritten using UNION with two identical scan relations
         // the analyzer assigns fresh expr IDs for one of them so that attributes don't collide
         // this rule assigns runtime filters to both scan relations (will be shared at runtime)
         // and must transform the runtime filter condition to use correct expr IDs for each relation
+        // note this only applies to group-based row-level operations (i.e. ReplaceData)
         // see RewriteUpdateTable for more details
-        val attrMap = buildTableToScanAttrMap(tableAttrs, relation.output)
+        val attrMap = buildTableToScanAttrMap(write.table.output, relation.output)
         val transformedCond = cond transform {
           case attr: AttributeReference if attrMap.contains(attr) => attrMap(attr)
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 1fff6c22c5ad8..62a3a977162aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -273,7 +273,7 @@ case class EnsureRequirements(
             child match {
               case ShuffleExchangeExec(_, c, so, ps) =>
                 ShuffleExchangeExec(newPartitioning, c, so, ps)
-              case GroupPartitionsExec(c, _, _, _, _) => ShuffleExchangeExec(newPartitioning, c)
+              case gpe: GroupPartitionsExec => ShuffleExchangeExec(newPartitioning, gpe.child)
               case _ => ShuffleExchangeExec(newPartitioning, child)
             }
           }
@@ -286,13 +286,49 @@ case class EnsureRequirements(
       if (SortOrder.orderingSatisfies(child.outputOrdering, requiredOrdering)) {
         child
       } else {
-        SortExec(requiredOrdering, global = false, child = child)
+        // Before adding a SortExec, check whether a GroupPartitionsExec anywhere in the child
+        // subtree can self-satisfy via sorted merge. tryEnableSortedMerge generates all alternative
+        // plans where one or more GPEs have sorted merge enabled; we take the first one whose
+        // outputOrdering satisfies the requirement.
+        tryEnableSortedMerge(child)
+          .find(newChild => SortOrder.orderingSatisfies(newChild.outputOrdering, requiredOrdering))
+          .getOrElse(SortExec(requiredOrdering, global = false, child = child))
       }
     }
 
     children
   }
 
+  private def hasKeyedPartitioning(p: Partitioning): Boolean = p match {
+    case e: Expression => e.exists(_.isInstanceOf[KeyedPartitioning])
+    case _ => false
+  }
+
+  // Generates all alternative plans in which one or more GroupPartitionsExec nodes in the subtree
+  // have sorted-merge enabled (every possible combination). Returns a LazyList so the caller can
+  // stop evaluating once a satisfying alternative is found.
+  //
+  // Pruning: traversal stops at SortExec (which reorders data, making sorted merge below it
+  // pointless) and at any node whose outputPartitioning no longer carries a KeyedPartitioning.
+  // This is a good heuristic, though not strictly equivalent to "ordering no longer propagates":
+  // partition-key expressions are constant within each coalesced partition and therefore usually
+  // prefix outputOrdering. When a node prunes the KeyedPartitioning (e.g. a Project that drops
+  // partition keys), it also prunes that ordering prefix. Since Spark has no notion of constant
+  // expressions in SortOrder, dropping a prefix invalidates the rest of the ordering too -- so in
+  // practice the two are always pruned together.
+  //
+  // At each GPE the rule emits [original, sorted-merge-enabled] alternatives (or just [original]
+  // when sorted merge cannot be enabled). multiTransformDownWithPruning then builds the Cartesian
+  // product across all GPEs in the subtree, giving every combination.
+  private[exchange] def tryEnableSortedMerge(plan: SparkPlan): LazyList[SparkPlan] =
+    plan.multiTransformDownWithPruning(
+      p => !p.isInstanceOf[SortExec] &&
+        hasKeyedPartitioning(p.asInstanceOf[SparkPlan].outputPartitioning)) {
+      case gpe: GroupPartitionsExec =>
+        // Include the original so that peer GPEs are still independently considered.
+        gpe +: gpe.tryEnableSortedMerge().toSeq
+    }
+
   private def reorder(
       leftKeys: IndexedSeq[Expression],
       rightKeys: IndexedSeq[Expression],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 7dcbf3779b93d..7f757c651c560 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -241,17 +241,22 @@ case class ShuffleExchangeExec(
    */
   @transient
   lazy val shuffleDependency : ShuffleDependency[Int, InternalRow, InternalRow] = {
-    val dep = ShuffleExchangeExec.prepareShuffleDependency(
-      inputRDD,
-      child.output,
-      outputPartitioning,
-      serializer,
-      writeMetrics)
-    metrics("numPartitions").set(dep.partitioner.numPartitions)
-    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    SQLMetrics.postDriverMetricUpdates(
-      sparkContext, executionId, metrics("numPartitions") :: Nil)
-    dep
+    // Wrap in the exchange's RDD scope so that any wrapper RDDs created during shuffle dependency
+    // preparation (e.g. by prepareShuffleDependency's mapPartitionsInternal calls) get this
+    // exchange's scope ID.
+    RDDOperationScope.withScope(sparkContext, nodeName, false, true, rddScopeId) {
+      val dep = ShuffleExchangeExec.prepareShuffleDependency(
+        inputRDD,
+        child.output,
+        outputPartitioning,
+        serializer,
+        writeMetrics)
+      metrics("numPartitions").set(dep.partitioner.numPartitions)
+      val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+      SQLMetrics.postDriverMetricUpdates(
+        sparkContext, executionId, metrics("numPartitions") :: Nil)
+      dep
+    }
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 242185e803577..7712fdc9f6cc2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -825,7 +825,13 @@ private[execution] final class LongToUnsafeRowMap(
         throw QueryExecutionErrors.cannotBuildHashedRelationLargerThan8GError()
       }
       val newNumWords = math.max(neededNumWords, math.min(page.size() / 8 * 2, 1 << 30))
-      val newPage = allocatePage(newNumWords.toInt * 8)
+      // newNumWords is a Long up to 1 << 30. Multiplying by 8 must stay in Long
+      // arithmetic; `newNumWords.toInt * 8` (Int * Int) overflows to 0 at the
+      // upper bound, causing `allocatePage(0)` to fall back to the default page
+      // size while subsequent writes still advance `cursor` past the new page's
+      // end (heap corruption observed as a `forward_copy_longs` SEGV during
+      // BHJ build on aarch64).
+      val newPage = allocatePage(newNumWords * 8L)
       Platform.copyMemory(page.getBaseObject, page.getBaseOffset, newPage.getBaseObject,
         newPage.getBaseOffset, usedBytes)
       freePage(page)
@@ -966,10 +972,13 @@ private[execution] final class LongToUnsafeRowMap(
     readData(readBuffer, array.memoryBlock.getBaseObject, array.memoryBlock.getBaseOffset, length)
     val pageLength = readLong().toInt
     freePage(page)
-    page = allocatePage(pageLength * 8)
+    // Use Long multiplication: pageLength can be up to 1 << 30 (8 GiB page / 8),
+    // and `Int * Int` overflows at that bound, leading to a 0-byte allocatePage
+    // and a subsequent cursor that runs past the page's end.
+    page = allocatePage(pageLength * 8L)
     readData(readBuffer, page.getBaseObject, page.getBaseOffset, pageLength)
     // Restore cursor variable to make this map able to be serialized again on executors.
-    cursor = pageLength * 8 + page.getBaseOffset
+    cursor = pageLength * 8L + page.getBaseOffset
   }
 
   override def readExternal(in: ObjectInput): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptAccumulator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptAccumulator.scala
new file mode 100644
index 0000000000000..114d3974bb0ee
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptAccumulator.scala
@@ -0,0 +1,435 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.metric
+
+import scala.collection.mutable
+import scala.reflect.ClassTag
+import scala.util.control.NonFatal
+
+import org.apache.spark.SparkContext
+import org.apache.spark.internal.{LogEntry, Logging}
+import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.execution.{BaseSubqueryExec, QueryExecution, SparkPlan, SubqueryAdaptiveBroadcastExec, SubqueryBroadcastExec, SubqueryExec, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.util.{AccumulatorV2, LastAttemptAccumulator}
+
+/*
+ * SQLLastAttemptAccumulator is a LastAttemptAccumulator that allows tracking the last attempt
+ * updates that happened in the scope of execution of a plan created by a specific Dataset's
+ * QueryExecution.
+ *
+ * Tracking RDDs belonging to a Dataset execution.
+ * -----------------------------------------------
+ * Dataset executes executedPlan from its QueryExecution. Each SparkPlan node in the
+ * executedPlan saves the RDD with its execution (executeRDD or executeColumnarRDD). However,
+ * the root RDD of a Spark Stage that actually gets submitted and executed is not necessarily
+ * that RDD. It may be an ephemeral RDD created on the fly when submitting the job, e.g.:
+ * - for result stages, there may be additional transformations to format the results, like
+ *   apply an Encoder (e.g. turn InternalRows into Rows)
+ *   or transformations for Arrow
+ * - for map stages, there may be some additional transformations to prepare the shuffle
+ *   data in correct format.
+ * - operations like dataframe caching may wrap the plan results to format and write to cache.
+ * We therefore cannot track the metrics updates just by RDD id. However, each SparkPlan also
+ * creates an RDDOperationScope, and wraps the execution it submits by that scope.
+ * The completed Tasks should have RDDOperationScope of the SparkPlan that submitted the
+ * Stage. We need to extract the RDDOperationScopes from Dataset.queryExecution.executedPlan
+ * to track last attempt metric updates coming from that execution.
+ *
+ * Additionally, it is possible that the same queryExecution.executedPlan is reused. For
+ * example, when collect() is called multiple times on the same Dataset.
+ * - Part of the execution (e.g. the shuffles) should then be reused. Accumulator should still
+ *   keep their partial values associated to its RDDOperationScope, and return it for this
+ *   new attempt.
+ * - Some of the execution (e.g. the result stage) may be recomputed. Since the SparkPlan will
+ *   be the same, RDDOperationScope will be the same, and this should become a newer execution
+ *   of the same RDD, which should replace the previous one.
+ *
+ * AQE plan changes
+ * ----------------
+ * AQE re-optimizes LogicalPlan and creates new SparkPlan. If the new plan doesn't contain
+ * some of the QueryStages from the previous plan, they can be cancelled while they already
+ * started running and accumulated some metric results.
+ * If the metric is part of SparkPlan.metrics, then the newly created plan will have new
+ * metrics and the old metrics would have been discarded; so nothing needs to be tracked here.
+ * But if the metric is coming from outside, it can be reused by the new SparkPlan.
+ * A new plan will have a new RDD and a new RDDOperationScope, so by tracking these for the
+ * final AQE plan, only values from the final plan and execution should be aggregated.
+ *
+ * It can also happen that the new AQE plan reuses SparkPlan instances from the old plan,
+ * see CancelShuffleStageInBroadcastJoin. However, in that case, the old plan will be put
+ * under some new plan in newly submitted Stages. Since we only truly track the plans that
+ * submit Stages, these should be different and enough to disambiguate.
+ *
+ * Driver only updates
+ * -------------------
+ * The metric can be updated directly on the driver side, during the execution of catalyst
+ * optimizer. One example is [[ConvertToLocalRelation]] optimization rule, which constant folds
+ * pieces of the plan.
+ * Execution in this scope is tagged with [[QueryExecution.id]] using
+ * [[SparkContext.DATASET_QUERY_EXECUTION_ID_KEY]] property, and this metric is tracking
+ * the metric value separately for each QueryExecution.
+ * Like with LastAttemptAccumulator, the metric will bail out if it's updated both from the driver
+ * and from executor Tasks.
+ *
+ * Cached / Checkpointed plans
+ * ---------------------------
+ * If the metric was used inside a cached (df.cache, df.persist) or checkpointed (df.checkpoint,
+ * df.localCheckpoint) plan, which is then turned into an RDDScanExec or InMemoryTableScanExec
+ * in the Dataset's executedPlan, [[lastAttemptValueForDataset]] and
+ * [[lastAttemptValueForQueryExecution]] are declared undefined behavior. In this case,
+ * [[lastAttemptValueForHighestRDDId()]] should be used instead, which returns the value from
+ * the execution in which the plan was cached/checkpointed.
+ *
+ * The main issue is if the metric is in the top stage of the cached plan. When that plan is
+ * executed in some Dataset (as lazy execution), the metric will be executed in the scope of the
+ * stage that contains the InMemoryTableScanExec / RDDScanExec, which will be some parent of that
+ * plan, and not plan of the cached plan. So if the cached plan is then used in another Dataset,
+ * that Dataset will not have information about that parent.
+ * There could be some hacks done to fix it by recording in the InMemoryRelation the scopes in
+ * which it was materialized. There are also other issues, like that checkpoint throws away the
+ * plan, so it would also have to record the RDD scopes used during checkpointing. This gets
+ * further complicated if recomputations are involved, and are done in yet another scope.
+ * It was declared undefined behavior instead of pursuing this.
+ */
+
+/**
+ * A trait that can be mixed into a subclass of [[AccumulatorV2]] to track the "logical"
+ * value of the "last attempt" of the execution using the accumulator.
+ * In addition to what [[LastAttemptAccumulator]] does, it allows tracking the last attempt
+ * executed in the scope of a Dataset's QueryExecution, via
+ * [[lastAttemptValueForDataset]] and [[lastAttemptValueForQueryExecution]] methods.
+ */
+trait SQLLastAttemptAccumulator[IN, OUT, PARTIAL, DRIVER_ACC]
+    extends LastAttemptAccumulator[IN, OUT, PARTIAL] {
+  this: AccumulatorV2[IN, OUT] =>
+
+  /** Create a fresh accumulator to hold driver-side values for one QueryExecution. */
+  protected def newDriverQueryExecutionAcc(): DRIVER_ACC
+  /** Add a value to a driver-side per-QueryExecution accumulator. */
+  protected def addToDriverAcc(acc: DRIVER_ACC, value: IN): Unit
+  /** Set the value of a driver-side per-QueryExecution accumulator. */
+  protected def setDriverAcc(acc: DRIVER_ACC, value: OUT): Unit
+  /** Read the value of a driver-side per-QueryExecution accumulator. */
+  protected def driverAccValue(acc: DRIVER_ACC): OUT
+
+  @transient
+  private var lastAttemptDirectDriverQueryExecutionValues: mutable.Map[String, DRIVER_ACC] = _
+
+  override def initializeLastAttemptAccumulator()(implicit ct: ClassTag[PARTIAL]): Unit = try {
+    super.initializeLastAttemptAccumulator()(ct)
+    lastAttemptDirectDriverQueryExecutionValues = new mutable.HashMap[String, DRIVER_ACC]()
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in initializeLastAttemptAccumulator",
+        exception = Some(e))
+  }
+
+  override def resetLastAttemptAccumulator(): Unit = try {
+    super.resetLastAttemptAccumulator()
+    lastAttemptDirectDriverQueryExecutionValues = new mutable.HashMap[String, DRIVER_ACC]()
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in resetLastAttemptAccumulator",
+        exception = Some(e))
+  }
+
+  override protected def assertValid() = {
+    super.assertValid()
+    assert(lastAttemptDirectDriverQueryExecutionValues != null)
+  }
+
+  protected def getOrCreateDirectDriverQueryExecutionValue(queryExecutionId: String): DRIVER_ACC = {
+    lastAttemptDirectDriverQueryExecutionValues.synchronized {
+      if (!lastAttemptDirectDriverQueryExecutionValues.contains(queryExecutionId)) {
+        lastAttemptDirectDriverQueryExecutionValues.put(
+          queryExecutionId, newDriverQueryExecutionAcc())
+      }
+      lastAttemptDirectDriverQueryExecutionValues(queryExecutionId)
+    }
+  }
+
+  protected def getActiveDatasetQueryExecutionId: Option[String] = {
+    SparkContext
+      .getActive
+      .flatMap(sc => Option(sc.getLocalProperty(SparkContext.DATASET_QUERY_EXECUTION_ID_KEY)))
+  }
+
+  /**
+   * Check if the value is added on the driver side, not from within a task.
+   * If it is set in the scope of a Dataset's QueryExecution, associate it with that scope.
+   * This must be called from `add` methods of any AccumulatorV2 subclass supporting
+   * SQL last attempt metrics to set what the `value` of the metric is after the operation.
+   * This should be called there after [[setValueIfOnDriverSide]].
+   */
+  protected def addQueryExecutionValueIfOnDriverSide(value: IN): Unit = try {
+    // Note: setValueIfOnDriverSide will already make it invalid if there are also RDD updates.
+    if (isAtDriverSide && lastAttemptAccumulatorInitialized && !lastAttemptAccumulatorInvalid) {
+      // Direct update on the driver, not from within a task.
+      getActiveDatasetQueryExecutionId match {
+        case Some(qeId) =>
+          addToDriverAcc(getOrCreateDirectDriverQueryExecutionValue(qeId), value)
+        case None => // pass
+      }
+    }
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in addQueryExecutionValueIfOnDriverSide",
+        exception = Some(e))
+  }
+
+  /**
+   * Like [[addQueryExecutionValueIfOnDriverSide]], but for set operations.
+   */
+  protected def setQueryExecutionValueIfOnDriverSide(value: OUT): Unit = try {
+    if (isAtDriverSide && lastAttemptAccumulatorInitialized && !lastAttemptAccumulatorInvalid) {
+      getActiveDatasetQueryExecutionId match {
+        case Some(qeId) =>
+          setDriverAcc(getOrCreateDirectDriverQueryExecutionValue(qeId), value)
+        case None => // pass
+      }
+    }
+  } catch {
+    case NonFatal(e) =>
+      unexpectedLastAttemptMetricOperation(
+        invalidate = true,
+        reason = "Unexpected exception in setQueryExecutionValueIfOnDriverSide",
+        exception = Some(e))
+  }
+
+  override def logAccumulatorState: LogEntry = try {
+    val driverQEVals = Option(lastAttemptDirectDriverQueryExecutionValues)
+      .map(_.map { case (key, acc) => s"$key -> ${driverAccValue(acc)}" }.mkString("\n"))
+      .getOrElse("<not initialized>")
+    super.logAccumulatorState +
+      log"""
+         |Direct driver QE values:
+         |${MDC(logKeyAccumulatorState, driverQEVals)}
+         """.stripMargin
+  } catch {
+    case NonFatal(e) =>
+      logWarning(log"Unexpected exception in logAccumulatorState", e)
+      log"<Unexpected exception in logAccumulatorState>"
+  }
+
+  /**
+   * Returns the last attempt value of this accumulator, aggregated from the last execution of this
+   * QueryExecution.
+   *
+   * @note The output of this method is undefined if this metric was used inside a part of the plan
+   *       which was either checkpointed (e.g. df.localCheckpoint(), df.checkpoint()) or cached
+   *       (e.g. df.cache(), df.persist()).
+   *       [[lastAttemptValueForHighestRDDId()]] should be used instead, which returns the
+   *       value from the execution in which the plan was cached/checkpointed.
+   *
+   * @return None if the last attempt value cannot be established, Some(value) otherwise.
+   */
+  def lastAttemptValueForQueryExecution(qe: QueryExecution): Option[OUT] = {
+    if (lastAttemptAccumulatorInvalid) return None
+    assertValid()
+    // If there was a driver set value defined in the scope of this QueryExecution, return that.
+    lastAttemptDirectDriverQueryExecutionValues.get(qe.id.toString) match {
+      case Some(acc) => return Some(driverAccValue(acc))
+      case None => // pass
+    }
+    // Otherwise, gather the RDD scopes from the plan and find metric updates from these scopes.
+    val scopes = SQLLastAttemptAccumulator.extractStageRDDScopes(qe.executedPlan)
+    scopes match {
+      case Left(bailOutReason) =>
+        unexpectedLastAttemptMetricOperation(
+          invalidate = false,
+          reason = s"Unable to extract RDD scopes from query execution plan: $bailOutReason")
+        None
+      case Right(scopes) =>
+        lastAttemptValueForRDDScopes(scopes)
+    }
+  }
+
+  /**
+   * Returns the last attempt value of this accumulator, aggregated from the last execution of this
+   * Dataset.
+   *
+   * @note The output of this method is undefined if this metric was used inside a part of the plan
+   *       which was either checkpointed (e.g. df.localCheckpoint(), df.checkpoint()) or cached
+   *       (e.g. df.cache(), df.persist()).
+   *       [[lastAttemptValueForHighestRDDId()]] should be used instead, which returns the
+   *       value from the execution in which the plan was cached/checkpointed.
+   *
+   * @return None if the last attempt value cannot be established, Some(value) otherwise.
+   */
+  def lastAttemptValueForDataset(ds: Dataset[_]): Option[OUT] = {
+    lastAttemptValueForQueryExecution(ds.queryExecution)
+  }
+
+  /** Visible for testing. */
+  def getDirectDriverQueryExecutionValue(qeId: String): Option[OUT] = {
+    lastAttemptDirectDriverQueryExecutionValues.get(qeId).map(driverAccValue)
+  }
+}
+
+object SQLLastAttemptAccumulator extends Logging {
+
+  private[metric] def extractStageRDDScopes(sparkPlan: SparkPlan): Either[String, Seq[String]] = {
+    var bailOutReason: Option[String] = None
+
+    // recurse, setting the bailOutReason on failure, or returning the list of scopes on success.
+    def recurse(sparkPlan: SparkPlan): Seq[String] = {
+      if (bailOutReason.isDefined) {
+        Nil
+      } else {
+        extractStageRDDScopes(sparkPlan) match {
+          case Left(reason) =>
+            bailOutReason = Some(reason)
+            Nil
+          case Right(scopes) => scopes
+        }
+      }
+    }
+
+    def scopeIds(sparkPlan: SparkPlan): Seq[String] = {
+      AdaptiveSparkPlanHelper.stripAQEPlan(sparkPlan) match {
+        case w: WholeStageCodegenExec =>
+          // WholeStageCodegenExec can fallback and execute the child plan without codegen instead,
+          // we don't know when this happens, so we need to account for both cases.
+          // It will never be both at the same time as this is a compilation time decision, so
+          // returning both won't result in duplicates.
+          Seq(w.rddScopeId, w.child.rddScopeId)
+        case p => Seq(p.rddScopeId)
+      }
+    }
+
+    // The root of the plan is submitted as a result stage.
+    val resultStageScopes = scopeIds(sparkPlan)
+
+    val stagesScopes = AdaptiveSparkPlanHelper.flatMap(sparkPlan) {
+      case _ if bailOutReason.isDefined => Nil
+
+      // broadcast exchange stage submitting nodes
+      case bl: BroadcastExchangeLike => bl match {
+        case b: BroadcastExchangeExec =>
+          // The job is submitted in scope of child of the broadcast exchange.
+          // ```
+          // val rs = child.executeCollectResult()
+          // ```
+          // <- executeCollectResult() is called on the child, and child executes it in its scope.
+          scopeIds(b.child)
+        case p =>
+          // Bail out if future unknown implementation is encountered.
+          bailOutReason = Some(s"Unsupported BroadcastExchangeLike: ${p.getClass.getName}")
+          Nil
+      }
+
+      // shuffle exchange stage submitting nodes
+      case sl: ShuffleExchangeLike => sl match {
+        // All shuffle exchange implementations create the ShuffledRowRDD / ShuffledBlockRDD
+        // with its own scope, and it will be executed in that scope.
+        case s: ShuffleExchangeExec => scopeIds(s)
+        case p =>
+          // Bail out if future unknown implementation is encountered.
+          bailOutReason = Some(s"Unsupported ShuffleExchangeLike: ${p.getClass.getName}")
+          Nil
+      }
+
+      // reused exchange
+      case r: ReusedExchangeExec =>
+        // Reused exchange is going to reuse stuff executed in the scope of its child,
+        // i.e. the exchange it reuses.
+        recurse(r.child)
+
+      case sl: BaseSubqueryExec => sl match {
+        case s: SubqueryExec =>
+          // ```
+          // val rows: Array[InternalRow] = if (maxNumRows.isDefined) {
+          //  child.executeTake(maxNumRows.get)
+          // } else {
+          //   child.executeCollect()
+          // }
+          // ```
+          // will launch stages in scope of child.
+          scopeIds(s.child)
+        case _: SubqueryBroadcastExec =>
+          // Used by DPP filter only, not part of main flow of query execution.
+          Nil
+        case _: SubqueryAdaptiveBroadcastExec =>
+          // Used by DPP filter only.
+          Nil
+        case p =>
+          // Bail out if future unknown implementation is encountered.
+          bailOutReason = Some(s"Unsupported BaseSubqueryExec: ${p.getClass.getName}")
+          Nil
+      }
+
+      /* Useful comments for posterity.
+      // cached table node
+      case _: InMemoryTableScanLike =>
+        // Do nothing for cached tables. There are many border cases where it wouldn't work.
+        // Some notes for posterity:
+        // For [[InMemoryTableScanExec]], we could recursed into the cachedPlan, but:
+        // - if the metric is in the top stage of that plan, then it would be executed in the scope
+        //   of the stage of whatever execution that InMemoryTableScanExec is part of when the
+        //   plan is cached. [[InMemoryTableScanExec]] is not a stage submitting node by itself, and
+        //   by itself it doesn't have visibility into the parent that submits the stage that
+        //   materializes the cache. If the current executedPlan is not the one that materializes,
+        //   then the metric would return 0 instead of the value from the cached execution. If the
+        //   current executedPlan is the one that materializes the cache, then it would be the
+        //   correct value.
+        // - if the metric is in a map stage of the cachedPlan, then it would be correctly
+        //   annotated with the scope of that stage, and it would work correctly.
+        //
+        // Since it's hard to achieve a consistent behavior here, we just do not support it.
+        Nil
+
+      // RDD node
+      case _: RDDScanExec =>
+        // Similar as with cached tables, do nothing with RDDs.
+        // This could be a plan coming from an execution of df.checkpoint().
+        // Since checkpointing cuts the references to the original plan, there is no way to descend
+        // into it to check attribution.
+        // We could try to make checkpoint collect and store the scopes of the original execution,
+        // but even then it would face similar inconsistencies as described above for cached plans.
+        // - if the metric is in the top stage of that plan, then if it was executed in the scope
+        //   of this execution, it would be attributed to the scope of the parent stage that is
+        //   consuming the checkpointed RDD, not to any scope of the original plan.
+        // - if the metric is in a map stage of the plan that was checkpointed, it requires that
+        //   checkpoint would track these stages and scopes.
+        //
+        // Since it's hard to achieve a consistent behavior here, we just do not support it.
+        Nil
+      */
+
+      case _ => Nil // only extract from nodes that submit stages
+    }
+
+    // also collect the plan scopes of all subqueries, which are executed "on the side".
+    val subqueriesScopes = AdaptiveSparkPlanHelper.flatMap(sparkPlan) { p =>
+      p.subqueries.flatMap(recurse)
+    }
+
+    if (bailOutReason.isDefined) {
+      Left(bailOutReason.get)
+    } else {
+      Right(resultStageScopes ++ stagesScopes ++ subqueriesScopes)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetric.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetric.scala
new file mode 100644
index 0000000000000..33326fb8e5bc4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetric.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.metric
+
+import org.apache.spark.SparkContext
+import org.apache.spark.util.AccumulatorV2
+
+class SQLLastAttemptMetric(
+    metricType: String,
+    initValue: Long = 0L)
+  extends SQLMetric(metricType, initValue)
+  with SQLLastAttemptAccumulator[Long, Long, Long, SQLMetric] {
+
+  override protected def partialMergeVal: Long = _value
+
+  override protected def partialMerge(value: Long): Unit = {
+    // For SQLLastAttemptMetric, this is just add to the underlying SQLMetric.
+    super.add(value)
+  }
+
+  override protected def isMergeable(other: AccumulatorV2[_, _]): Boolean = other match {
+    case o: SQLLastAttemptMetric => o.metricType == metricType
+    case _ => false
+  }
+
+  // SQLLastAttemptMetric is used internally to aggregate system metrics (counters) such as
+  // number of rows processed, and it should not store user data.
+  protected def accumulatorStoresUserData: Boolean = false
+
+  override protected def newDriverQueryExecutionAcc(): SQLMetric =
+    new SQLMetric(metricType, initValue)
+  override protected def addToDriverAcc(acc: SQLMetric, value: Long): Unit = acc.add(value)
+  override protected def setDriverAcc(acc: SQLMetric, value: Long): Unit = acc.set(value)
+  override protected def driverAccValue(acc: SQLMetric): Long = acc.value
+
+  override def copy(): SQLLastAttemptMetric = {
+    val newAcc = new SQLLastAttemptMetric(metricType, initValue)
+    newAcc._value = _value
+    newAcc
+  }
+
+  override def add(v: Long): Unit = {
+    super.add(v)
+    if (v >= 0) {
+      // set value of SQLMetric after the add.
+      setValueIfOnDriverSide(value)
+      addQueryExecutionValueIfOnDriverSide(v)
+    }
+  }
+
+  override def set(v: Long): Unit = {
+    super.set(v)
+    if (v >= 0) {
+      // set value of SQLMetric after the set.
+      setValueIfOnDriverSide(value)
+      setQueryExecutionValueIfOnDriverSide(value)
+    }
+  }
+
+}
+
+object SQLLastAttemptMetrics {
+  /**
+   * Create a metric to report the value aggregated from the last attempt of each task. These
+   * would be the values for the tasks that actually contributed to the final output of the
+   * execution.
+   */
+  def createMetric(sc: SparkContext, name: String): SQLLastAttemptMetric = {
+    val acc = new SQLLastAttemptMetric(SQLMetrics.SUM_METRIC)
+    acc.register(sc, name = SQLMetrics.metricsCache.get(name), countFailedValues = false)
+    acc.initializeLastAttemptAccumulator()
+    acc
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index 13f4d7926bea8..0523df282cda5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -45,7 +45,7 @@ class SQLMetric(
   // for SPARK-11013.
   assert(initValue <= 0)
   // _value will always be either initValue or non-negative.
-  private var _value = initValue
+  private[metric] var _value = initValue
 
   override def copy(): SQLMetric = {
     val newAcc = new SQLMetric(metricType, initValue)
@@ -110,7 +110,7 @@ class SQLMetric(
 }
 
 object SQLMetrics {
-  private val SUM_METRIC = "sum"
+  private[metric] val SUM_METRIC = "sum"
   private val SIZE_METRIC = "size"
   private val TIMING_METRIC = "timing"
   private val NS_TIMING_METRIC = "nsTiming"
@@ -120,7 +120,7 @@ object SQLMetrics {
 
   val cachedSQLAccumIdentifier = Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)
 
-  private val metricsCache: LoadingCache[String, Option[String]] =
+  private[metric] val metricsCache: LoadingCache[String, Option[String]] =
     CacheBuilder.newBuilder().maximumSize(10000)
     .build(new CacheLoader[String, Option[String]] {
       override def load(name: String): Option[String] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
index df108187c9f03..be36b368719ad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
@@ -20,14 +20,12 @@ package org.apache.spark.sql.execution.python
 import java.io.DataOutputStream
 import java.util
 
-import org.apache.arrow.compression.{Lz4CompressionCodec, ZstdCompressionCodec}
 import org.apache.arrow.vector.{VectorSchemaRoot, VectorUnloader}
-import org.apache.arrow.vector.compression.{CompressionCodec, NoCompressionCodec}
 
-import org.apache.spark.{SparkEnv, SparkException, TaskContext}
+import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonWorker}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.arrow.ArrowWriterWrapper
+import org.apache.spark.sql.execution.arrow.{ArrowCompressionUtils, ArrowWriterWrapper}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
@@ -90,21 +88,8 @@ class CoGroupedArrowPythonRunner(
 
   // Helper method to create VectorUnloader with compression
   private def createUnloader(root: VectorSchemaRoot): VectorUnloader = {
-    val codec = compressionCodecName match {
-      case "none" => NoCompressionCodec.INSTANCE
-      case "zstd" =>
-        val compressionLevel = SQLConf.get.arrowZstdCompressionLevel
-        val factory = CompressionCodec.Factory.INSTANCE
-        val codecType = new ZstdCompressionCodec(compressionLevel).getCodecType()
-        factory.createCodec(codecType)
-      case "lz4" =>
-        val factory = CompressionCodec.Factory.INSTANCE
-        val codecType = new Lz4CompressionCodec().getCodecType()
-        factory.createCodec(codecType)
-      case other =>
-        throw SparkException.internalError(
-          s"Unsupported Arrow compression codec: $other. Supported values: none, zstd, lz4")
-    }
+    val codec = ArrowCompressionUtils.createCompressionCodec(
+      compressionCodecName, SQLConf.get.arrowZstdCompressionLevel)
     new VectorUnloader(root, true, codec, true)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
index 6a9b4978e27b0..adee0b2ea19a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData, STUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.ops.TypeApiOps
-import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{BinaryView, UTF8String, VariantVal}
 
 object EvaluatePython {
 
@@ -97,9 +97,9 @@ object EvaluatePython {
 
       case (s: UTF8String, _: StringType) => s.toString
 
-      case (g: GeometryVal, gt: GeometryType) => STUtils.deserializeGeom(g, gt)
+      case (g: BinaryView, gt: GeometryType) => STUtils.deserializeGeom(g, gt)
 
-      case (g: GeographyVal, gt: GeographyType) => STUtils.deserializeGeog(g, gt)
+      case (g: BinaryView, gt: GeographyType) => STUtils.deserializeGeog(g, gt)
 
       case (bytes: Array[Byte], BinaryType) =>
         if (binaryAsBytes) {
@@ -245,7 +245,8 @@ object EvaluatePython {
         val geographySrid = s.get("srid").asInstanceOf[Int]
         g.assertSridAllowedForType(geographySrid)
         STUtils.stGeogFromWKB(
-          s.get("wkb").asInstanceOf[Array[Byte]])
+          s.get("wkb").asInstanceOf[Array[Byte]],
+          geographySrid)
     }
 
     case g: GeometryType => (obj: Any) => nullSafeConvert(obj) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowInput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowInput.scala
index 2b200294803d8..b0c4576153828 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowInput.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowInput.scala
@@ -19,19 +19,18 @@ package org.apache.spark.sql.execution.python
 import java.io.DataOutputStream
 import java.nio.channels.Channels
 
-import org.apache.arrow.compression.{Lz4CompressionCodec, ZstdCompressionCodec}
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector.{VectorSchemaRoot, VectorUnloader}
-import org.apache.arrow.vector.compression.{CompressionCodec, NoCompressionCodec}
+import org.apache.arrow.vector.compression.CompressionCodec
 import org.apache.arrow.vector.ipc.ArrowStreamWriter
 import org.apache.arrow.vector.ipc.WriteChannel
 import org.apache.arrow.vector.ipc.message.MessageSerializer
 
-import org.apache.spark.{SparkEnv, SparkException, TaskContext}
+import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{BasePythonRunner, PythonWorker}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.arrow
-import org.apache.spark.sql.execution.arrow.{ArrowWriter, ArrowWriterWrapper}
+import org.apache.spark.sql.execution.arrow.{ArrowCompressionUtils, ArrowWriter, ArrowWriterWrapper}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
@@ -69,21 +68,8 @@ private[python] trait PythonArrowInput[IN] { self: BasePythonRunner[IN, _] =>
   }
 
   // Create compression codec based on config
-  protected def codec: CompressionCodec = SQLConf.get.arrowCompressionCodec match {
-    case "none" => NoCompressionCodec.INSTANCE
-    case "zstd" =>
-      val compressionLevel = SQLConf.get.arrowZstdCompressionLevel
-      val factory = CompressionCodec.Factory.INSTANCE
-      val codecType = new ZstdCompressionCodec(compressionLevel).getCodecType()
-      factory.createCodec(codecType)
-    case "lz4" =>
-      val factory = CompressionCodec.Factory.INSTANCE
-      val codecType = new Lz4CompressionCodec().getCodecType()
-      factory.createCodec(codecType)
-    case other =>
-      throw SparkException.internalError(
-        s"Unsupported Arrow compression codec: $other. Supported values: none, zstd, lz4")
-  }
+  protected def codec: CompressionCodec = ArrowCompressionUtils.createCompressionCodec(
+    SQLConf.get.arrowCompressionCodec, SQLConf.get.arrowZstdCompressionLevel)
 
   protected var writer: ArrowStreamWriter = _
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 14cd06038b5af..4c7a8437a46fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
 import org.apache.spark.sql.classic.SparkSession
 import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability}
 import org.apache.spark.sql.connector.distributions.UnspecifiedDistribution
+import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, PartitionOffset, ReadLimit, SparkDataStream}
 import org.apache.spark.sql.connector.write.{RequiresDistributionAndOrdering, Write}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -92,7 +93,15 @@ class ContinuousExecution(
             log"from DataSourceV2 named '${MDC(STREAMING_DATA_SOURCE_NAME, sourceName)}' " +
             log"${MDC(STREAMING_DATA_SOURCE_DESCRIPTION, dsStr)}")
           // TODO: operator pushdown.
-          val scan = table.newScanBuilder(options).build()
+          // Passes the full output schema (not a pruned subset) so that connectors
+          // implementing SupportsMetadataColumns can include metadata columns in readSchema().
+          val scanBuilder = table.newScanBuilder(options)
+          scanBuilder match {
+            case r: SupportsPushDownRequiredColumns =>
+              r.pruneColumns(output.toStructType)
+            case _ =>
+          }
+          val scan = scanBuilder.build()
           val stream = scan.toContinuousStream(metadataPath)
           val relation = StreamingDataSourceV2Relation(
               table, output, catalog, identifier, options, metadataPath)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/IncrementalExecution.scala
index 1587fd4786a35..9fc72241e83b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/IncrementalExecution.scala
@@ -143,6 +143,9 @@ class IncrementalExecution(
     }
   }
 
+  // Use `this` for explain so the already-open transaction and executedPlan are reused.
+  override protected def queryExecutionForExplain: QueryExecution = this
+
   private val allowMultipleStatefulOperators: Boolean =
     sparkSession.sessionState.conf.getConf(SQLConf.STATEFUL_OPERATOR_ALLOW_MULTIPLE)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/MicroBatchExecution.scala
index 973af04e04307..84f0373ca5d48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/MicroBatchExecution.scala
@@ -26,9 +26,10 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkIllegalArgumentException, SparkIllegalStateException}
+import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkIllegalStateException}
 import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.LogKeys._
+import org.apache.spark.sql.catalyst.analysis.V2TableReference
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp, FileSourceMetadataAttribute, LocalTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Deduplicate, DeduplicateWithinWatermark, Distinct, FlatMapGroupsInPandasWithState, FlatMapGroupsWithState, GlobalLimit, Join, LeafNode, LocalRelation, LogicalPlan, Project, StreamSourceAwareLogicalPlan, TransformWithState, TransformWithStateInPySpark}
@@ -37,7 +38,8 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.classic.{Dataset, SparkSession}
 import org.apache.spark.sql.classic.ClassicConversions.castToImpl
-import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability, TransactionalCatalogPlugin}
+import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset => OffsetV2, ReadLimit, SparkDataStream, SupportsAdmissionControl, SupportsRealTimeMode, SupportsTriggerAvailableNow}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
@@ -46,7 +48,6 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, Real
 import org.apache.spark.sql.execution.streaming.{AvailableNowTrigger, Offset, OneTimeTrigger, ProcessingTimeTrigger, RealTimeModeAllowlist, RealTimeTrigger, Sink, Source, StreamingQueryPlanTraverseHelper}
 import org.apache.spark.sql.execution.streaming.checkpointing.{CheckpointFileManager, CommitMetadata, OffsetSeqBase, OffsetSeqLog, OffsetSeqMetadata, OffsetSeqMetadataV2}
 import org.apache.spark.sql.execution.streaming.operators.stateful.{StatefulOperatorStateInfo, StatefulOpStateStoreCheckpointInfo, StateStoreWriter}
-import org.apache.spark.sql.execution.streaming.runtime.AcceptsLatestSeenOffsetHandler
 import org.apache.spark.sql.execution.streaming.runtime.StreamingCheckpointConstants.{DIR_NAME_COMMITS, DIR_NAME_OFFSETS, DIR_NAME_STATE}
 import org.apache.spark.sql.execution.streaming.sources.{ForeachBatchSink, WriteToMicroBatchDataSource, WriteToMicroBatchDataSourceV1}
 import org.apache.spark.sql.execution.streaming.state.{OfflineStateRepartitionUtils, StateSchemaBroadcast, StateStoreErrors}
@@ -112,6 +113,22 @@ class MicroBatchExecution(
 
   override protected def sourceToIdMap: Map[SparkDataStream, String] = sourceIdMap.map(_.swap)
 
+  // Sink name for commit log support
+  // If sink evolution is enabled, use user-provided sinkName (or error if not provided)
+  // Otherwise, always use DEFAULT_SINK_NAME for backward compatibility
+  private val sinkName: String = {
+    if (sparkSession.sessionState.conf.enableStreamingSinkEvolution) {
+      plan.sinkName.getOrElse {
+        throw new SparkException(
+          errorClass = "STREAMING_QUERY_EVOLUTION_ERROR.UNNAMED_STREAMING_SINKS_WITH_ENFORCEMENT",
+          messageParameters = Map.empty,
+          cause = null)
+      }
+    } else {
+      MicroBatchExecution.DEFAULT_SINK_NAME
+    }
+  }
+
   @volatile protected[sql] var triggerExecutor: TriggerExecutor = _
 
   protected def getTrigger(): TriggerExecutor = {
@@ -167,6 +184,16 @@ class MicroBatchExecution(
   // into every subsequent batch's query plan.
   private val stateSchemaMetadatas = MutableMap[Long, StateSchemaBroadcast]()
 
+  /**
+   * Cached result of the first `offsetLog.getLatest()` call. Reused by both
+   * `logicalPlan` (to determine `enforceNamed`) and `initializeExecution` (to seed
+   * `latestStartedBatch`). This avoids a redundant `ListStatus` on the checkpoint's
+   * `offsets/` directory during stream startup. Safe to cache: between construction
+   * and `initializeExecution`, nothing else writes the offset log on the query thread.
+   */
+  private lazy val initialLatestOffsetSeq: Option[(Long, OffsetSeqBase)] =
+    offsetLog.getLatest()
+
   override lazy val logicalPlan: LogicalPlan = {
     assert(queryExecutionThread eq Thread.currentThread,
       "logicalPlan must be initialized in QueryExecutionThread " +
@@ -188,7 +215,7 @@ class MicroBatchExecution(
 
     // Read the source evolution enforcement from the last written offset log entry. If no entries
     // are found, use the session config value.
-    val enforceNamed = offsetLog.getLatest().flatMap { case (_, offsetSeq) =>
+    val enforceNamed = initialLatestOffsetSeq.flatMap { case (_, offsetSeq) =>
       offsetSeq.metadataOpt.flatMap { metadata =>
         OffsetSeqMetadata.readValueOpt(metadata, SQLConf.ENABLE_STREAMING_SOURCE_EVOLUTION)
           .map(_.toBoolean)
@@ -224,7 +251,15 @@ class MicroBatchExecution(
               log"from DataSourceV2 named '${MDC(LogKeys.STREAMING_DATA_SOURCE_NAME, srcName)}' " +
               log"${MDC(LogKeys.STREAMING_DATA_SOURCE_DESCRIPTION, dsStr)}")
             // TODO: operator pushdown.
-            val scan = table.newScanBuilder(options).build()
+            // Passes the full output schema (not a pruned subset) so that connectors
+            // implementing SupportsMetadataColumns can include metadata columns in readSchema().
+            val scanBuilder = table.newScanBuilder(options)
+            scanBuilder match {
+              case r: SupportsPushDownRequiredColumns =>
+                r.pruneColumns(output.toStructType)
+              case _ =>
+            }
+            val scan = scanBuilder.build()
             val stream = scan.toMicroBatchStream(metadataPath)
             val relation = StreamingDataSourceV2Relation(
                 table,
@@ -346,15 +381,26 @@ class MicroBatchExecution(
       )
     }
 
-    // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
     sink match {
       case s: SupportsWrite =>
-        val relationOpt = plan.catalogAndIdent.map {
-          case (catalog, ident) => DataSourceV2Relation.create(s, Some(catalog), Some(ident))
+        val relation = plan.catalogAndIdent match {
+          // For transactional catalog sinks, capture the baseline table metadata in a
+          // V2TableReference so that each micro-batch re-resolves the table through the
+          // transaction-aware catalog and fails if the table has been replaced or schema changed.
+          case Some((catalog: TransactionalCatalogPlugin, ident)) =>
+            // Re-resolve through the streaming session's catalog manager so the reference
+            // captures the streaming-session-specific catalog instance. TransactionalWrite
+            // detection and transaction begin must happen in the streaming session context.
+            val catalogManager = sparkSessionForStream.sessionState.catalogManager
+            val streamingCatalog = catalogManager.catalog(catalog.name)
+            val v2Relation = DataSourceV2Relation.create(s, Some(streamingCatalog), Some(ident))
+            V2TableReference.createForWriteTarget(v2Relation)
+          case Some((catalog, ident)) =>
+            DataSourceV2Relation.create(s, Some(catalog), Some(ident))
+          case None => DataSourceV2Relation.create(s, None, None)
         }
         WriteToMicroBatchDataSource(
-          relationOpt,
-          table = s,
+          relation,
           query = _logicalPlan,
           queryId = id.toString,
           extraOptions,
@@ -424,7 +470,7 @@ class MicroBatchExecution(
 
   private def initializeExecution(
       sparkSessionForStream: SparkSession): MicroBatchExecutionContext = {
-    var latestStartedBatch = offsetLog.getLatest()
+    var latestStartedBatch = initialLatestOffsetSeq
     val latestCommittedBatch = commitLog.getLatest()
 
     val lastCommittedBatchId = latestCommittedBatch match {
@@ -1461,6 +1507,12 @@ class MicroBatchExecution(
 
 object MicroBatchExecution {
   val BATCH_ID_KEY = "streaming.sql.batchId"
+
+  /**
+   * Default sink name used when sink evolution is disabled or no explicit name is provided.
+   * This maintains backward compatibility with existing streaming queries.
+   */
+  private[sql] val DEFAULT_SINK_NAME = "sink-0"
 }
 
 case class OffsetHolder(start: OffsetV2, end: Option[OffsetV2]) extends LeafNode {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/ResolveWriteToStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/ResolveWriteToStream.scala
index ff0d71d0f0759..0be430591dbd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/ResolveWriteToStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/runtime/ResolveWriteToStream.scala
@@ -66,6 +66,7 @@ object ResolveWriteToStream extends Rule[LogicalPlan] {
 
       WriteToStream(
         s.userSpecifiedName.orNull,
+        s.userSpecifiedSinkName,
         resolvedCheckpointLocation,
         s.sink,
         s.outputMode,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
index 0a33093dcbcea..5f8c53df08d2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
@@ -17,31 +17,48 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
+import org.apache.spark.sql.catalyst.analysis.NamedRelation
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
-import org.apache.spark.sql.connector.catalog.SupportsWrite
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode, V2StreamingWriteCommand}
 import org.apache.spark.sql.streaming.OutputMode
 
 /**
  * The logical plan for writing data to a micro-batch stream.
  *
- * Note that this logical plan does not have a corresponding physical plan, as it will be converted
- * to [[org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2 WriteToDataSourceV2]]
+ * Note that this logical plan does not have a corresponding physical plan, as it will be
+ * converted to
+ * [[org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2 WriteToDataSourceV2]]
  * with [[MicroBatchWrite]] before execution.
+ *
+ * When the write target is backed by a transactional catalog, it is created as a
+ * [[org.apache.spark.sql.catalyst.analysis.V2TableReference V2TableReference]].
+ * This is then resolved by ResolveRelations as a
+ * [[org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation DataSourceV2Relation]]
+ * for each micro-batch.
+ *
+ * For non-transactional catalogs, the write target is pre-resolved as a
+ * [[org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation DataSourceV2Relation]].
  */
 case class WriteToMicroBatchDataSource(
-    relation: Option[DataSourceV2Relation],
-    table: SupportsWrite,
+    relation: NamedRelation,
     query: LogicalPlan,
     queryId: String,
     writeOptions: Map[String, String],
     outputMode: OutputMode,
     batchId: Option[Long] = None)
-  extends UnaryNode {
+  extends UnaryNode with V2StreamingWriteCommand {
+
   override def child: LogicalPlan = query
   override def output: Seq[Attribute] = Nil
 
+  override def simpleString(maxFields: Int): String =
+    s"WriteToMicroBatchDataSource ${relation.name}"
+
+  override def table: NamedRelation = relation
+
+  override def withNewTable(newTable: NamedRelation): WriteToMicroBatchDataSource =
+    copy(relation = newTable)
+
   def withNewBatchId(batchId: Long): WriteToMicroBatchDataSource = {
     copy(batchId = Some(batchId))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 89e7a3058f3d0..bd479ffc6d2a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -2133,6 +2133,15 @@ class RocksDB(
       nativeStats.close()
       rocksDbOptions.close()
       dbLogger.close()
+      // In unbounded memory mode each RocksDB instance owns its LRUCache. Without explicit
+      // close() the native C++ cache object is only freed when the JVM GC finalizes the Java
+      // wrapper -- which rarely happens under low heap pressure. Closing explicitly here
+      // ensures native memory is reclaimed deterministically when the instance is released.
+      // In bounded mode the cache is a shared singleton managed by RocksDBMemoryManager
+      // and must not be closed here.
+      if (!conf.boundedMemoryUsage && lruCache != null) {
+        lruCache.close()
+      }
 
       var snapshot = snapshotsToUploadQueue.poll()
       while (snapshot != null) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
index d919a0d556d38..5801aba8c7693 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
@@ -42,6 +42,8 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") {
         <div id="sql-executions-table">
           {spinner}
         </div>
+        <script src={UIUtils.prependBaseUri(
+          request, "/static/sql/sql-table-utils.js")}></script>
         <script src={UIUtils.prependBaseUri(
           request, "/static/sql/allexecutionspage.js")}></script>
       </span>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index 3c8f0c1bec9d1..a556ee9339e01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -46,53 +46,52 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
 
     val executionId = parameterExecutionId.toLong
     val content = sqlStore.execution(executionId).map { executionUIData =>
-      val currentTime = System.currentTimeMillis()
-      val duration = executionUIData.completionTime.map(_.getTime()).getOrElse(currentTime) -
-        executionUIData.submissionTime
+      val isSubExec = executionUIData.rootExecutionId != executionId
+      val subExecutions = if (groupSubExecutionEnabled) {
+        sqlStore.executionsList()
+          .filter(e => e.rootExecutionId == executionId && e.executionId != executionId)
+      } else {
+        Seq.empty
+      }
 
+      // Headers and row data are rendered client-side by DataTables from the
+      // column definitions in static/sql-execution-detail.js -- intentionally
+      // no <thead>/<tbody> in the Scala template below. Adding markup headers
+      // here causes DataTables to double-render and breaks layout (SPARK-56259).
       val summary =
-        <div>
-          <ul class="list-unstyled">
-            <li>
-              <strong>Submitted Time: </strong>{UIUtils.formatDate(executionUIData.submissionTime)}
-            </li>
-            <li>
-              <strong>Duration: </strong>{UIUtils.formatDuration(duration)}
-            </li>
-            {
-              Option(executionUIData.queryId).map { qId =>
-                <li>
-                  <strong>Query ID: </strong>{qId}
-                </li>
-              }.getOrElse(Seq.empty)
-            }
-            {
-              if (executionUIData.rootExecutionId != executionId) {
-                <li>
-                  <strong>Parent Execution: </strong>
-                  <a href={"?id=" + executionUIData.rootExecutionId}>
-                    {executionUIData.rootExecutionId}
-                  </a>
-                </li>
-              }
-            }
-            {
-              if (groupSubExecutionEnabled) {
-                val subExecutions = sqlStore.executionsList()
-                  .filter(e => e.rootExecutionId == executionId && e.executionId != executionId)
-                if (subExecutions.nonEmpty) {
-                  <li>
-                    <strong>Sub Executions: </strong>
-                    {
-                      subExecutions.map { e =>
-                        <a href={"?id=" + e.executionId}>{e.executionId}</a><span>&nbsp;</span>
+        <div class="mb-3">
+          <table id="sql-execution-table" class="table table-striped compact cell-border"
+                 style="width:100%" data-execution-id={executionId.toString}>
+          </table>
+          {
+            if (isSubExec || subExecutions.nonEmpty) {
+              <ul class="list-unstyled small text-muted mb-2">
+                {
+                  if (isSubExec) {
+                    <li>
+                      <strong>Parent Execution: </strong>
+                      <a href={"?id=" + executionUIData.rootExecutionId}>
+                        {executionUIData.rootExecutionId}
+                      </a>
+                    </li>
+                  }
+                }
+                {
+                  if (subExecutions.nonEmpty) {
+                    <li>
+                      <strong>Sub Executions: </strong>
+                      {
+                        subExecutions.map { e =>
+                          <a href={"?id=" + e.executionId}>{e.executionId}</a>
+                          <span>&nbsp;</span>
+                        }
                       }
-                    }
-                  </li>
+                    </li>
+                  }
                 }
-              }
+              </ul>
             }
-          </ul>
+          }
           <div id="plan-viz-download-btn-container">
             <select id="plan-viz-format-select">
               <option value="svg">SVG</option>
@@ -109,6 +108,10 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
                     type="button" title="Copy shareable link to this execution">
               &#x1f517; Copy Link</button>
           </div>
+          <script src={UIUtils.prependBaseUri(
+            request, "/static/sql/sql-table-utils.js")}></script>
+          <script src={UIUtils.prependBaseUri(
+            request, "/static/sql/executionpage.js")}></script>
         </div>
 
       val metrics = sqlStore.executionMetrics(executionId)
@@ -128,7 +131,8 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
     }
 
     UIUtils.headerSparkPage(
-      request, s"Details for Query $executionId", content, parent, useTimeline = true)
+      request, s"Details for Query $executionId", content, parent,
+      useDataTables = true, useTimeline = true)
   }
 
 
@@ -168,6 +172,39 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
               <input type="checkbox" id="detailed-labels-checkbox"></input>
               <span>Show metrics in graph nodes (detailed mode)</span>
             </div>
+            <div id="plan-viz-zoom-toolbar" class="plan-viz-zoom-toolbar">
+              <div id="plan-viz-search-collapsed" class="btn-group btn-group-sm me-2"
+                   role="group" aria-label="Search">
+                <button id="plan-viz-search-toggle" type="button"
+                        class="btn btn-light border" title="Find node (/)">&#x1f50d;</button>
+              </div>
+              <div id="plan-viz-search-expanded" class="input-group input-group-sm me-2 d-none"
+                   role="group" aria-label="Search">
+                <input id="plan-viz-search-input" type="search" autocomplete="off"
+                       class="form-control form-control-sm border"
+                       placeholder="Find node..." aria-label="Find node"/>
+                <span id="plan-viz-search-count" class="input-group-text bg-light"></span>
+                <button id="plan-viz-search-prev" type="button"
+                        class="btn btn-light border"
+                        title="Previous match (Shift+Enter)">&#x2191;</button>
+                <button id="plan-viz-search-next" type="button"
+                        class="btn btn-light border"
+                        title="Next match (Enter)">&#x2193;</button>
+                <button id="plan-viz-search-close" type="button"
+                        class="btn btn-light border"
+                        title="Close search (Esc)">&times;</button>
+              </div>
+              <div class="btn-group btn-group-sm" role="group" aria-label="Zoom controls">
+                <button id="plan-viz-zoom-out" type="button"
+                        class="btn btn-light border" title="Zoom out (-)">&#x2212;</button>
+                <button id="plan-viz-zoom-reset" type="button"
+                        class="btn btn-light border" title="Reset zoom to fit (0)">
+                  <span id="plan-viz-zoom-level">100%</span>
+                </button>
+                <button id="plan-viz-zoom-in" type="button"
+                        class="btn btn-light border" title="Zoom in (+)">&#x2b;</button>
+              </div>
+            </div>
           </div>
         </div>
         <div id="plan-viz-details-col" class="col-4 d-none">
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
index 63934944c21a3..4e87d8a8a49e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
@@ -181,7 +181,27 @@ private[window] final class AggregateProcessor(
   }
 
   /** Evaluate buffer. */
-  def evaluate(target: InternalRow): Unit = {
-    evaluateProjection.target(target)(buffer)
+  def evaluate(target: InternalRow): Unit = evaluate(buffer, target)
+
+  /**
+   * Evaluate using an arbitrary `source` buffer (e.g. a segment-tree query
+   * result) instead of the internal one.
+   *
+   * '''Contract''': `source` must share this processor's internal
+   * `aggBufferAttributes` layout -- same field count, same dataTypes, in the
+   * same order. The segment-tree path enforces this upstream in
+   * `WindowEvaluatorFactoryBase.eligibleForSegTree`, which restricts eligible
+   * functions to `WindowSegmentTree.EligibleAggregates` (an explicit allowlist
+   * of `DeclarativeAggregate` classes). The same `functions: Array[DeclarativeAggregate]`
+   * drives both this processor's `bufferSchema` and `WindowSegmentTree`'s
+   * internal buffer schema. The contract is invisible at the call site and
+   * easy to break from either end; the cheap field-count `assert` below
+   * surfaces drift loudly instead of producing silently garbled output.
+   */
+  private[window] def evaluate(source: InternalRow, target: InternalRow): Unit = {
+    assert(source.numFields == bufferSchema.length,
+      s"source buffer has ${source.numFields} fields, " +
+        s"expected ${bufferSchema.length} to match aggBufferAttributes layout")
+    evaluateProjection.target(target)(source)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowFunctionFrame.scala
new file mode 100644
index 0000000000000..51648e31e3498
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowFunctionFrame.scala
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import org.apache.spark.TaskContext
+import org.apache.spark.memory.TaskMemoryManager
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, FrameType, MutableProjection, RangeFrame, RowFrame, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
+import org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Moving-frame window function frame backed by [[WindowSegmentTree]]. Produces
+ * the same outputs as [[SlidingWindowFunctionFrame]] for RowFrame or
+ * single-column RangeFrame moving frames whose aggregates are all
+ * [[DeclarativeAggregate]] with no FILTER/DISTINCT. For partitions below
+ * `spark.sql.window.segmentTree.minPartitionRows`, delegates to a wrapped
+ * [[SlidingWindowFunctionFrame]]. Under RANGE, two forward-only cursors
+ * (`lowerIter` / `upperIter`) advance the bounds in O(n) total; the segtree
+ * answers `[lowerBound, upperBound)` in O(log n).
+ *
+ * @note Not thread-safe.
+ */
+private[window] final class SegmentTreeWindowFunctionFrame(
+    target: InternalRow,
+    processor: AggregateProcessor,
+    functions: Array[DeclarativeAggregate],
+    inputSchema: Seq[Attribute],
+    frameType: FrameType,
+    lbound: BoundOrdering,
+    ubound: BoundOrdering,
+    newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
+    conf: SQLConf,
+    maxCachedBlocks: Option[Int],
+    taskMemoryManager: TaskMemoryManager,
+    numSegmentTreeFrames: Option[SQLMetric] = None,
+    numSegmentTreeFallbackFrames: Option[SQLMetric] = None)
+  extends WindowFunctionFrame with AutoCloseable {
+
+  require(frameType == RowFrame || frameType == RangeFrame,
+    s"SegmentTreeWindowFunctionFrame supports RowFrame or RangeFrame, got $frameType")
+
+  private[this] var fallback: SlidingWindowFunctionFrame = _
+  private[this] var tree: WindowSegmentTree = _
+
+  /**
+   * Allocate a fresh fallback sliding-window frame. Called lazily from
+   * `prepare()` on the small-partition path. Factored out for testability
+   * (subclasses can inject a throwing fallback for prepare-failure tests).
+   */
+  private[window] def newFallback(): SlidingWindowFunctionFrame =
+    new SlidingWindowFunctionFrame(target, processor, lbound, ubound)
+
+  /** Test hook: whether the fallback frame has been lazily allocated. */
+  private[window] def fallbackAllocated: Boolean = fallback != null
+
+  // ---- RowFrame-only driver state ----
+  // `boundIter` advances `upperBound` one row at a time; the lower bound is
+  // pure index arithmetic under RowFrame.
+  private[this] var boundIter: Iterator[UnsafeRow] = _
+  private[this] var nextRow: UnsafeRow = _
+
+  // ---- RangeFrame-only driver state ----
+  // Two cursors over `rowArray`; `lowerRow` / `upperRow` hold the buffered
+  // head of each cursor, pre-fetched in `prepare` so
+  // `RangeBoundOrdering.compare` is never called with a null row on round 0.
+  //
+  // Spill-safety invariant: when `rowArray` spills, its iterator reuses a
+  // single `UnsafeRow` whose pointer is rebound on each `next()`. Tolerated
+  // here because the cursor is **read-before-advance**: each `writeRange`
+  // iteration reads `lowerRow` / `upperRow` for comparison before calling
+  // `getNextOrNull(...)`. DO NOT cache a historical row into a separate
+  // field without an explicit `.copy()`; the shared reusable UnsafeRow
+  // would silently mutate.
+  private[this] var lowerIter: Iterator[UnsafeRow] = _
+  private[this] var upperIter: Iterator[UnsafeRow] = _
+  private[this] var lowerRow: UnsafeRow = _
+  private[this] var upperRow: UnsafeRow = _
+
+  // Shared endpoints: monotone across `write()` calls within a partition.
+  private[this] var lowerBound: Int = 0
+  private[this] var upperBound: Int = 0
+
+  /**
+   * Runtime dispatch flag: when `true`, `write()`, `currentLowerBound()`, and
+   * `currentUpperBound()` delegate to the wrapped [[SlidingWindowFunctionFrame]]
+   * (small-partition path). Set by `prepare()` based on partition size vs.
+   * `spark.sql.window.segmentTree.minPartitionRows`.
+   */
+  private[window] var fallbackUsed: Boolean = false
+
+  // Register close() once per frame instance so the tree's block cache and
+  // any open row-array iterators are released when the task completes.
+  // Registering here (vs. inside the factory closure) avoids duplicate
+  // listeners when the factory is invoked multiple times per task.
+  {
+    val tc = TaskContext.get()
+    if (tc != null) {
+      tc.addTaskCompletionListener[Unit](_ => close())
+    }
+  }
+
+  override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
+    // INVARIANT: sole call site is `WindowPartitionEvaluator.fetchNextPartition`;
+    // `prepare` is called exactly once per (partition, frame). No mid-partition
+    // retry is supported.
+    //
+    // Ordering (matches `OffsetWindowFunctionFrameBase.prepare`): commit to a
+    // path first (allocate+prepare the fallback, or build the tree), and only
+    // then reset the opposite path's cached state. If the committing action
+    // throws, the frame stays in its previous (possibly stale but
+    // self-consistent) state; a thrown `prepare` is fatal for the task.
+    if (rows.length < conf.windowSegmentTreeMinPartitionRows) {
+      if (fallback == null) fallback = newFallback()
+      fallback.prepare(rows)  // may throw -- frame unchanged on failure
+      // Commit fallback path only after `prepare` succeeded.
+      resetSegtreeState()
+      fallbackUsed = true
+      numSegmentTreeFallbackFrames.foreach(_ += 1)
+      return
+    }
+    // Segtree path. Drop any retained fallback so its row-copy buffer from a
+    // prior small partition is GC-eligible; next small partition reallocates
+    // via `newFallback()`.
+    fallback = null
+    resetSegtreeState()
+    fallbackUsed = false
+    tree = new WindowSegmentTree(
+      functions,
+      inputSchema,
+      newMutableProjection,
+      fanout = conf.windowSegmentTreeFanout,
+      blockSize = conf.windowSegmentTreeBlockSize,
+      maxCachedBlocks = maxCachedBlocks,
+      taskMemoryManager = taskMemoryManager)
+    // Tree holds a reference to `rows` (caller-owned); no extra copy.
+    tree.build(rows)
+    // Count only on the successful segtree path: if `tree.build` throws,
+    // the counter is not bumped.
+    numSegmentTreeFrames.foreach(_ += 1)
+    frameType match {
+      case RowFrame =>
+        boundIter = rows.generateIterator()
+        nextRow = WindowFunctionFrame.getNextOrNull(boundIter)
+      case RangeFrame =>
+        lowerIter = rows.generateIterator()
+        upperIter = rows.generateIterator()
+        // Pre-seed cursor heads so `RangeBoundOrdering.compare` never
+        // dereferences null on round 0. Either may be null if `rows` is
+        // empty; the advance loops' `!= null` / `< upperBound` guards
+        // handle that.
+        lowerRow = WindowFunctionFrame.getNextOrNull(lowerIter)
+        upperRow = WindowFunctionFrame.getNextOrNull(upperIter)
+    }
+  }
+
+  /** Release prior segtree-path cached state (tree + row-cursors + bounds). */
+  private def resetSegtreeState(): Unit = {
+    if (tree != null) {
+      tree.close()
+      tree = null
+    }
+    closeIters()
+    nextRow = null
+    lowerRow = null
+    upperRow = null
+    lowerBound = 0
+    upperBound = 0
+  }
+
+  override def write(index: Int, current: InternalRow): Unit = {
+    if (fallbackUsed) {
+      fallback.write(index, current)
+      return
+    }
+    frameType match {
+      case RowFrame => writeRow(index, current)
+      case RangeFrame => writeRange(index, current)
+    }
+  }
+
+  // `writeRow`/`writeRange` mirror the `(lowerBound, upperBound)` monotone
+  // cursor invariant of `SlidingWindowFunctionFrame.write`, but run
+  // admit-then-drop (no buffer to maintain) instead of drop-then-admit.
+  // Any future fix to Sliding's boundary semantics must be mirrored here;
+  // equivalence is guarded by `SegmentTreeWindowFunctionSuite` flag-on/off
+  // tests (`checkRangeEquivalence`, `feature flag off ...`, fallback tests)
+  // which compare against the Sliding baseline.
+  private def writeRow(index: Int, current: InternalRow): Unit = {
+    var boundsChanged = index == 0
+
+    // admit loop: extend upperBound; if a candidate is already below the
+    // lower bound, advance lowerBound in lock-step to preserve invariant
+    // (0 <= lowerBound <= upperBound <= tree.size).
+    while (nextRow != null &&
+        ubound.compare(nextRow, upperBound, current, index) <= 0) {
+      if (lbound.compare(nextRow, lowerBound, current, index) < 0) {
+        lowerBound += 1
+      }
+      nextRow = WindowFunctionFrame.getNextOrNull(boundIter)
+      upperBound += 1
+      boundsChanged = true
+    }
+    // drop loop: advance lowerBound to the frame's left edge. RowFrame's
+    // `lbound.compare` is pure index arithmetic so the input row is unread;
+    // `lowerBound < upperBound` is the second defense.
+    while (lowerBound < upperBound &&
+        lbound.compare(null, lowerBound, current, index) < 0) {
+      lowerBound += 1
+      boundsChanged = true
+    }
+
+    if (boundsChanged) {
+      tree.queryInto(lowerBound, upperBound, processor, target)
+    }
+  }
+
+  private def writeRange(index: Int, current: InternalRow): Unit = {
+    var boundsChanged = index == 0
+
+    // admit loop (upper edge). `RangeBoundOrdering.compare` ignores its index
+    // arguments; we pass `upperBound` for API symmetry with RowBoundOrdering.
+    while (upperRow != null &&
+        ubound.compare(upperRow, upperBound, current, index) <= 0) {
+      upperBound += 1
+      upperRow = WindowFunctionFrame.getNextOrNull(upperIter)
+      boundsChanged = true
+    }
+
+    // drop loop (lower edge): strict `< 0`, guarded by
+    // `lowerBound < upperBound` so drop never overruns admit -- this also
+    // ensures `lowerRow` is non-null when reached (if the iterator
+    // exhausts, `lowerBound` has already caught up to `numRows >=
+    // upperBound`, so the guard stops the loop first).
+    while (lowerBound < upperBound &&
+        lbound.compare(lowerRow, lowerBound, current, index) < 0) {
+      lowerBound += 1
+      lowerRow = WindowFunctionFrame.getNextOrNull(lowerIter)
+      boundsChanged = true
+    }
+
+    if (boundsChanged) {
+      // Empty frame is handled inside `queryInto`, which initializes the
+      // processor and emits the aggregate's zero/identity value -- same as
+      // `SlidingWindowFunctionFrame` with an empty buffer.
+      tree.queryInto(lowerBound, upperBound, processor, target)
+    }
+  }
+
+  override def currentLowerBound(): Int =
+    if (fallbackUsed) fallback.currentLowerBound() else lowerBound
+
+  override def currentUpperBound(): Int =
+    if (fallbackUsed) fallback.currentUpperBound() else upperBound
+
+  /**
+   * Drop references to open rowArray iterators. Idempotent.
+   *
+   * Note: `ExternalAppendOnlyUnsafeRowArrayIterator.closeIfNeeded()` is
+   * `protected`, so we cannot invoke it. Spark's own `SlidingWindowFunctionFrame`
+   * also does not close its iterator; the backing `UnsafeExternalSorter` is
+   * released by the enclosing `WindowExec`'s `TaskCompletionListener`.
+   */
+  private def closeIters(): Unit = {
+    boundIter = null
+    lowerIter = null
+    upperIter = null
+  }
+
+  override def close(): Unit = {
+    if (tree != null) {
+      tree.close()
+      tree = null
+    }
+    closeIters()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactory.scala
index c4b20d4b7c7d7..09c207ee603a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactory.scala
@@ -30,9 +30,15 @@ class WindowEvaluatorFactory(
     val partitionSpec: Seq[Expression],
     val orderSpec: Seq[SortOrder],
     val childOutput: Seq[Attribute],
-    val spillSize: SQLMetric)
+    val spillSize: SQLMetric,
+    segmentTreeFrames: SQLMetric,
+    segmentTreeFallbackFrames: SQLMetric)
   extends PartitionEvaluatorFactory[InternalRow, InternalRow] with WindowEvaluatorFactoryBase {
 
+  override def numSegmentTreeFrames: Option[SQLMetric] = Some(segmentTreeFrames)
+  override def numSegmentTreeFallbackFrames: Option[SQLMetric] =
+    Some(segmentTreeFallbackFrames)
+
   override def createEvaluator(): PartitionEvaluator[InternalRow, InternalRow] = {
     new WindowPartitionEvaluator()
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala
index 9930c4a8963ab..2ae10ce9d711c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala
@@ -20,10 +20,10 @@ package org.apache.spark.sql.execution.window
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Add, AggregateWindowFunction, Ascending, Attribute, BoundReference, CurrentRow, DateAdd, DateAddYMInterval, DecimalAddNoOverflowCheck, Descending, Expression, ExtractANSIIntervalDays, FrameLessOffsetWindowFunction, FrameType, IdentityProjection, IntegerLiteral, MutableProjection, NamedExpression, OffsetWindowFunction, PythonFuncExpression, RangeFrame, RowFrame, RowOrdering, SortOrder, SpecifiedWindowFrame, TimestampAddInterval, TimestampAddYMInterval, UnaryMinus, UnboundedFollowing, UnboundedPreceding, UnsafeProjection, WindowExpression}
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, DeclarativeAggregate}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{CalendarIntervalType, DateType, DayTimeIntervalType, DecimalType, IntegerType, TimestampNTZType, TimestampType, YearMonthIntervalType}
@@ -36,6 +36,14 @@ trait WindowEvaluatorFactoryBase {
   def orderSpec: Seq[SortOrder]
   def childOutput: Seq[Attribute]
   def spillSize: SQLMetric
+  /**
+   * Counters for [[SegmentTreeWindowFunctionFrame]] observability. Default
+   * `None` means the subclass does not integrate with the segment-tree frame
+   * path (e.g. [[org.apache.spark.sql.execution.python.ArrowWindowPythonEvaluatorFactory]]);
+   * only [[WindowEvaluatorFactory]] wires them.
+   */
+  def numSegmentTreeFrames: Option[SQLMetric] = None
+  def numSegmentTreeFallbackFrames: Option[SQLMetric] = None
 
   /**
    * Create the resulting projection.
@@ -191,13 +199,18 @@ trait WindowEvaluatorFactoryBase {
         // in a single Window physical node. Therefore, we can assume no SQL aggregation
         // functions if Pandas UDF exists. In the future, we might mix Pandas UDF and SQL
         // aggregation function in a single physical node.
+        val aggFilters: Array[Option[Expression]] = expressions.map {
+          case WindowExpression(ae: AggregateExpression, _) => ae.filter
+          case _ => None
+        }.toArray
+        // Keep as `def` (lazy / per-call): the FRAME_LESS_OFFSET /
+        // UNBOUNDED_OFFSET / UNBOUNDED_PRECEDING_OFFSET branches do not read
+        // `processor`. Eager `val` construction would invoke
+        // `AggregateProcessor.apply` on Lag / Lead / NthValue and throw
+        // `INTERNAL_ERROR: Unsupported aggregate function`.
         def processor = if (functions.exists(_.isInstanceOf[PythonFuncExpression])) {
           null
         } else {
-          val aggFilters = expressions.map {
-            case WindowExpression(ae: AggregateExpression, _) => ae.filter
-            case _ => None
-          }.toArray
           AggregateProcessor(
             functions,
             ordinal,
@@ -206,6 +219,8 @@ trait WindowEvaluatorFactoryBase {
               MutableProjection.create(expressions, schema),
             aggFilters)
         }
+        val conf = SQLConf.get
+        val blockSize = conf.windowSegmentTreeBlockSize
 
         // Create the factory to produce WindowFunctionFrame.
         val factory = key match {
@@ -275,12 +290,44 @@ trait WindowEvaluatorFactoryBase {
 
           // Moving Frame.
           case ("AGGREGATE", frameType, lower, upper, _) =>
-            target: InternalRow => {
-              new SlidingWindowFunctionFrame(
-                target,
-                processor,
-                createBoundOrdering(frameType, lower, timeZone),
-                createBoundOrdering(frameType, upper, timeZone))
+            if (eligibleForSegTree(functions, aggFilters, frameType, conf)) {
+              val segFns = functions.map(_.asInstanceOf[DeclarativeAggregate])
+              val cacheHint = estimateMaxCachedBlocks(lower, upper, frameType, blockSize)
+              target: InternalRow => {
+                // Task-completion listener registration lives inside the frame
+                // constructor (one per frame instance) to avoid duplicates when
+                // this closure fires multiple times per task. `TaskContext.get()`
+                // is only called at task-execution time, never at driver planning.
+                val tc = TaskContext.get()
+                if (tc == null) {
+                  throw SparkException.internalError(
+                    "WindowEvaluatorFactoryBase.segTreeFrameFactory requires " +
+                      "an active TaskContext")
+                }
+                val tmm = tc.taskMemoryManager()
+                new SegmentTreeWindowFunctionFrame(
+                  target,
+                  processor,
+                  segFns,
+                  childOutput,
+                  frameType,
+                  createBoundOrdering(frameType, lower, timeZone),
+                  createBoundOrdering(frameType, upper, timeZone),
+                  (e, s) => MutableProjection.create(e, s),
+                  conf,
+                  cacheHint,
+                  tmm,
+                  numSegmentTreeFrames,
+                  numSegmentTreeFallbackFrames)
+              }
+            } else {
+              target: InternalRow => {
+                new SlidingWindowFunctionFrame(
+                  target,
+                  processor,
+                  createBoundOrdering(frameType, lower, timeZone),
+                  createBoundOrdering(frameType, upper, timeZone))
+              }
             }
 
           case _ =>
@@ -295,4 +342,67 @@ trait WindowEvaluatorFactoryBase {
     }
   }
 
+  /**
+   * Segment-tree path eligibility. The tree relies on
+   * `DeclarativeAggregate.mergeExpressions`, which [[AggregateWindowFunction]]s
+   * (NthValue, NTile, Rank, RowNumber, NullIndex) refuse via
+   * `mergeUnsupportedByWindowFunctionError`: they extend DeclarativeAggregate
+   * but are NOT merge-capable. Normal aggregate window expressions reach this
+   * code as the inner DeclarativeAggregate unwrapped from
+   * [[AggregateExpression]] (see `windowFrameExpressionFactoryPairs.collect`).
+   *
+   * DISTINCT aggregate window expressions are already rejected earlier in
+   * analysis by `WindowResolution.checkWindowFunction`
+   * (error class `DISTINCT_WINDOW_FUNCTION_UNSUPPORTED`), so no explicit
+   * `isDistinct` gate is needed here.
+   */
+  private def eligibleForSegTree(
+      functions: Array[Expression],
+      filters: Array[Option[Expression]],
+      frameType: FrameType,
+      conf: SQLConf): Boolean = {
+    // RANGE accepted only for single-column order specs. Multi-column RANGE
+    // with non-zero offset is already rejected by `createBoundOrdering`, so
+    // gating here on `orderSpec.size == 1` matches the Sliding-path invariant.
+    val frameTypeOk = frameType match {
+      case RowFrame => true
+      case RangeFrame => orderSpec.size == 1
+      case _ => false
+    }
+    conf.windowSegmentTreeEnabled &&
+      frameTypeOk &&
+      filters.forall(_.isEmpty) &&
+      functions.forall(WindowSegmentTree.isEligible)
+  }
+
+  private def estimateMaxCachedBlocks(
+      lower: Expression,
+      upper: Expression,
+      frameType: FrameType,
+      blockSize: Int): Option[Int] = {
+    // Reached via the moving-frame branch after `eligibleForSegTree`. Under
+    // RANGE the frame width is data-dependent (defined by order-key distance,
+    // not row count), so no static width inference is possible; fall back to
+    // a default budget and rely on the runtime LRU + TMM spiller.
+    assert(frameType == RowFrame || frameType == RangeFrame,
+      s"estimateMaxCachedBlocks expects RowFrame or RangeFrame, got $frameType")
+    if (frameType == RangeFrame) {
+      return Some(8)
+    }
+    val w: Option[Int] = (lower, upper) match {
+      case (CurrentRow, CurrentRow) => Some(1)
+      case (IntegerLiteral(lo), IntegerLiteral(hi)) => Some(math.abs(hi - lo) + 1)
+      case (CurrentRow, IntegerLiteral(hi)) => Some(math.abs(hi) + 1)
+      case (IntegerLiteral(lo), CurrentRow) => Some(math.abs(lo) + 1)
+      case _ => None
+    }
+    // `ceil(W / blockSize)` is the minimum number of blocks a single frame can
+    // straddle; `+ 2` adds one block of slack at each end to cover the case
+    // where the frame's [lower, upper) interval is offset within its leftmost
+    // block (so the cursor temporarily holds the previous block as well) and
+    // the symmetric case at the right edge -- without this slack the LRU
+    // would thrash on the boundary blocks every time the cursor advances.
+    w.map(ww => math.ceil(ww.toDouble / blockSize).toInt + 2).orElse(Some(8))
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 9ecd1c587a756..e80565c961399 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -91,7 +91,12 @@ case class WindowExec(
     child: SparkPlan)
   extends WindowExecBase {
   override lazy val metrics: Map[String, SQLMetric] = Map(
-    "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size")
+    "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size"),
+    "numSegmentTreeFrames" ->
+      SQLMetrics.createMetric(sparkContext, "number of segment-tree frames prepared"),
+    "numSegmentTreeFallbackFrames" ->
+      SQLMetrics.createMetric(sparkContext,
+        "number of segment-tree fallback frames prepared")
   )
 
   protected override def doExecute(): RDD[InternalRow] = {
@@ -101,7 +106,9 @@ case class WindowExec(
         partitionSpec,
         orderSpec,
         child.output,
-        longMetric("spillSize"))
+        longMetric("spillSize"),
+        longMetric("numSegmentTreeFrames"),
+        longMetric("numSegmentTreeFallbackFrames"))
 
     // Start processing.
     if (conf.usePartitionEvaluator) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowSegmentTree.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowSegmentTree.scala
new file mode 100644
index 0000000000000..27a7736361341
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowSegmentTree.scala
@@ -0,0 +1,614 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import java.util.{LinkedHashMap => JLinkedHashMap, Map => JMap}
+
+import scala.collection.mutable
+
+import org.apache.spark.SparkException
+import org.apache.spark.memory.{MemoryConsumer, MemoryMode, TaskMemoryManager}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.{Average, Count, DeclarativeAggregate, Max, Min, StddevPop, StddevSamp, Sum, VariancePop, VarianceSamp}
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * Block-chunked segment tree for moving-frame window aggregates. Partitions are
+ * split into blocks of `blockSize` rows; each block has its own small segtree
+ * (fanout `F`, height `h`). Block roots stay resident; internal nodes are cached
+ * in an LRU keyed by block index. Queries cost O(log W).
+ *
+ * Memory accounting invariants:
+ *  - I1: `SegTreeSpiller.spill()` MUST NOT call `acquireMemory` on its own
+ *        consumer (would deadlock TMM's consumer-priority sort). All acquires
+ *        happen on the hot path ([[ensureBlockLevels]]).
+ *  - I2: `spill(_, trigger)` returns 0 when `trigger eq this` (self-trigger
+ *        short-circuit) to prevent re-entrant eviction.
+ *  - I3: LRU `removeEldestEntry` is disabled; eviction is driven explicitly
+ *        from [[ensureBlockLevels]] or [[SegTreeSpiller.spill]].
+ *  - I4: Every successful [[acquireBlockMemory]] is paired with exactly one
+ *        [[releaseBlockMemory]]. [[close]] is idempotent.
+ *  - I5: Per-block bytes are a conservative upper bound (full block, 16 B/field).
+ *  - I8: If `rowArray` already spilled to disk, `spill` returns 0 (rebuild
+ *        would O(blockStart)-scan the spill file).
+ *
+ * @note Instances are not thread-safe.
+ */
+private[window] class WindowSegmentTree(
+    functions: Array[DeclarativeAggregate],
+    inputSchema: Seq[Attribute],
+    newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
+    fanout: Int = WindowSegmentTree.DefaultFanout,
+    blockSize: Int = WindowSegmentTree.DefaultBlockSize,
+    maxCachedBlocks: Option[Int] = None,
+    taskMemoryManager: TaskMemoryManager = null)
+  extends AutoCloseable {
+
+  require(fanout >= 2, s"fanout must be >= 2, got $fanout")
+  require(blockSize >= 1, s"blockSize must be >= 1, got $blockSize")
+  require(functions.nonEmpty, "WindowSegmentTree requires at least one aggregate function")
+  maxCachedBlocks.foreach { n =>
+    require(n >= 1, s"maxCachedBlocks must be >= 1 when specified, got $n")
+  }
+  require(taskMemoryManager != null,
+    "WindowSegmentTree requires a non-null TaskMemoryManager; " +
+      "in tests use `new TaskMemoryManager(new TestMemoryManager(conf), 0)`")
+
+  // ---------- Schemas & projections ----------
+
+  private val bufferAttrs: Seq[AttributeReference] =
+    functions.flatMap(_.aggBufferAttributes).toImmutableArraySeq
+  private val rightAttrs: Seq[AttributeReference] =
+    functions.flatMap(_.inputAggBufferAttributes).toImmutableArraySeq
+  private val bufferDataTypes: IndexedSeq[DataType] =
+    bufferAttrs.map(_.dataType).toIndexedSeq
+
+  private val initialValues: Seq[Expression] = functions.flatMap(_.initialValues).toIndexedSeq
+  private val updateExpressions: Seq[Expression] =
+    functions.flatMap(_.updateExpressions).toIndexedSeq
+  private val mergeExpressions: Seq[Expression] =
+    functions.flatMap(_.mergeExpressions).toIndexedSeq
+
+  private[this] val initProj: MutableProjection = newMutableProjection(initialValues, Nil)
+  private[this] val updateProj: MutableProjection =
+    newMutableProjection(updateExpressions, bufferAttrs ++ inputSchema)
+  private[this] val mergeProj: MutableProjection =
+    newMutableProjection(mergeExpressions, bufferAttrs ++ rightAttrs)
+
+  private[this] val joinedRow = new JoinedRow()
+
+  // ---------- State ----------
+
+  private var numRows: Int = 0
+  private var numBlocks: Int = 0
+  private var rowArray: ExternalAppendOnlyUnsafeRowArray = _
+  private var closed: Boolean = false
+
+  /**
+   * Always-resident per-block root aggregates: `blockAggregates(i)` =
+   *  merged buffer over all rows in block i.
+   */
+  private var blockAggregates: Array[InternalRow] = Array.empty
+
+  /**
+   * Conservative byte width of one aggregate buffer row at 16 B/field:
+   *  primitive `MutableValue` is 8 B, boxed references and object headers
+   *  push the effective footprint higher. Tighter per-type sizing is out
+   *  of scope; TaskMemoryManager remains the hard backstop via spill / OOM.
+   */
+  private val bufferWidthBytes: Long = {
+    val bytesPerField = 16L
+    math.max(1L, bufferDataTypes.size.toLong * bytesPerField)
+  }
+
+  /**
+   * Number of aggregate-buffer slots cached per block (see I5).
+   *
+   *  Invariant: equals `sum over levels L of levels(L).length` for any block
+   *  built by [[buildBlockLevels]]: level 0 holds `blockSize` leaves and each
+   *  next level holds `ceil(prev / fanout)` parents until a single root
+   *  remains. For `blockSize == 1` this is 1 (single leaf, no parents).
+   */
+  private val cachedSlotsPerBlock: Long = {
+    var n = blockSize.toLong
+    var sum = n
+    while (n > 1L) {
+      n = (n + fanout - 1) / fanout
+      sum += n
+    }
+    sum
+  }
+
+  /**
+   * Bytes accounted per cached block (see I5). Conservative: assumes every
+   *  block is full; tail block leaves a small headroom.
+   */
+  private[this] val blockBytes: Long =
+    math.max(1L, cachedSlotsPerBlock * bufferWidthBytes)
+
+  /**
+   * `spans(L)` = number of leaves covered by a single node at level L. Depends
+   *  only on fanout + blockSize, so precomputed once.
+   */
+  private val spans: Array[Int] = {
+    val maxLevel = {
+      var lvl = 0
+      var span = 1L
+      while (span < blockSize) { span *= fanout; lvl += 1 }
+      lvl
+    }
+    val arr = new Array[Int](maxLevel + 1)
+    var s = 1L
+    var i = 0
+    while (i <= maxLevel) {
+      arr(i) = if (s > Int.MaxValue) Int.MaxValue else s.toInt
+      s *= fanout
+      i += 1
+    }
+    arr
+  }
+
+  /**
+   * LRU cache of per-block internal node arrays. Key = blockIdx;
+   *  value = `Array[Array[InternalRow]]` with levels(0..h). Auto-eviction
+   *  via `removeEldestEntry` is disabled (I3) -- driven explicitly from
+   *  [[ensureBlockLevels]] or [[SegTreeSpiller.spill]]. Each entry maps 1:1
+   *  to one [[acquireBlockMemory]] accounting. Callers should pass a W-aware
+   *  `maxCachedBlocks` like `ceil(W / blockSize) + 2`.
+   */
+  private val blockLevelsCache: JLinkedHashMap[Integer, Array[Array[InternalRow]]] =
+    new JLinkedHashMap[Integer, Array[Array[InternalRow]]](16, 0.75f, true) {
+      override def removeEldestEntry(
+          eldest: JMap.Entry[Integer, Array[Array[InternalRow]]]): Boolean = false
+    }
+
+  // ---------- Memory consumer ----------
+
+  /**
+   * Private MemoryConsumer tracking cached block levels under TMM. Heap-only
+   * (no Tungsten pages): uses [[MemoryConsumer.acquireMemory]] /
+   * [[MemoryConsumer.freeMemory]], which update the base class `used`
+   * AtomicLong so TMM's consumer-priority sort sees our pressure accurately.
+   *
+   * Hardcoded [[MemoryMode.ON_HEAP]] (not `tmm.getTungstenMemoryMode`): the
+   * cache holds plain JVM objects (`SpecificInternalRow` /
+   * `Array[Array[InternalRow]]`), never Tungsten pages. Under
+   * `spark.memory.offHeap.enabled=true`, enrolling as OFF_HEAP would let
+   * TMM pick us as a spill candidate for off-heap pressure; our `spill()`
+   * would then phantom-credit the off-heap pool without releasing any
+   * off-heap bytes, violating [[TaskMemoryManager#acquireExecutionMemory]]'s
+   * same-pool spill contract. Mirrors
+   * [[org.apache.spark.util.collection.Spillable]], which also hardcodes
+   * ON_HEAP for the same reason. Consequence under off-heap Tungsten: I8
+   * (below) degrades to a no-op because segtree and `rowArray` live in
+   * different pools -- a loss of optimization, not a correctness hazard.
+   *
+   * @note `spill()` MUST NOT call `acquireMemory` (see I1).
+   */
+  private final class SegTreeSpiller extends MemoryConsumer(
+      taskMemoryManager,
+      taskMemoryManager.pageSizeBytes(),
+      MemoryMode.ON_HEAP) {
+    override def spill(size: Long, trigger: MemoryConsumer): Long = {
+      // I2: self-trigger short-circuit (prevent re-entrant eviction).
+      if (trigger eq this) return 0L
+      // I8: rowArray already spilled -- evicting our cache is counter-productive
+      // (rebuild would O(blockStart)-scan the spill file). `spillSize > 0` is
+      // the available "has spilled" signal (UnsafeExternalSorter state is not
+      // public).
+      if (rowArray != null && rowArray.spillSize > 0) return 0L
+      evictUntil(size)
+    }
+  }
+
+  private[this] val spiller: SegTreeSpiller = new SegTreeSpiller
+
+  // ---------- Public API ----------
+
+  def size: Int = numRows
+
+  /**
+   * Build the tree against a caller-owned row array.
+   *
+   * Ownership: the tree holds a reference to `rows` for its lifetime but does
+   * NOT own it -- the caller (typically `WindowPartitionEvaluator.buffer`)
+   * manages `clear()` / lifetime at partition boundaries. `close()` drops
+   * the reference without mutating the array.
+   *
+   * Exception-safe: if aggregation throws, previously built state is preserved.
+   */
+  def build(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
+    // rows.length is Int by design; check guards against future widening.
+    val n = rows.length
+    if (n < 0) {
+      throw SparkException.internalError(
+        s"WindowSegmentTree cannot hold more than Int.MaxValue rows, got $n")
+    }
+    val nBlocks = if (n == 0) 0 else (n + blockSize - 1) / blockSize
+    val newBlockAggs = computeBlockAggregates(rows, n, nBlocks)
+
+    // Commit.
+    rowArray = rows
+    numRows = n
+    numBlocks = nBlocks
+    blockAggregates = newBlockAggs
+    // Rebuild invalidates cached block levels; release accounting first (I4).
+    releaseAllCachedBlocks()
+  }
+
+  /**
+   * Query [lo, hi) and directly evaluate the result via `processor.evaluate`
+   * into `target`. Uses an internal pre-allocated buffer so no per-call
+   * allocation is needed.
+   */
+  private[window] def queryInto(
+      lo: Int, hi: Int, processor: AggregateProcessor, target: InternalRow): Unit = {
+    query(lo, hi, internalQueryBuffer)
+    processor.evaluate(internalQueryBuffer, target)
+  }
+
+  private[this] val internalQueryBuffer: InternalRow = newBuffer()
+
+  def query(lo: Int, hi: Int, outBuffer: InternalRow): Unit = {
+    if (lo < 0 || hi > numRows || lo > hi) {
+      throw SparkException.internalError(
+        s"Invalid range [lo=$lo, hi=$hi) for size=$numRows")
+    }
+    // Reset outBuffer to identity only after bounds validation.
+    initProj.target(outBuffer)(InternalRow.empty)
+    if (lo == hi) return
+
+    val blo = lo / blockSize
+    val bhi = (hi - 1) / blockSize
+
+    if (blo == bhi) {
+      val blockStart = blo * blockSize
+      mergeBlockRange(blo, lo - blockStart, hi - blockStart, outBuffer)
+    } else {
+      // left partial
+      val loStart = blo * blockSize
+      val loBlockRows = math.min(blockSize, numRows - loStart)
+      mergeBlockRange(blo, lo - loStart, loBlockRows, outBuffer)
+      // full blocks
+      var b = blo + 1
+      while (b < bhi) {
+        mergeInto(outBuffer, blockAggregates(b))
+        b += 1
+      }
+      // right partial
+      val hiStart = bhi * blockSize
+      mergeBlockRange(bhi, 0, hi - hiStart, outBuffer)
+    }
+  }
+
+  /** Terminal: releases all state. Idempotent (I4). */
+  override def close(): Unit = {
+    if (closed) return
+    // Free all cached-block accounting before dropping references.
+    releaseAllCachedBlocks()
+    closeRowArray()
+    blockAggregates = Array.empty
+    numRows = 0
+    numBlocks = 0
+    closed = true
+  }
+
+  // ---------- Test hooks (package-private) ----------
+
+  private[window] def peekBlockCount: Int = numBlocks
+
+  private[window] def testOnlySpiller(): MemoryConsumer = spiller
+
+  /** Test-only accessor for the per-block memory accounting value. */
+  private[window] def peekBlockBytes: Long = blockBytes
+
+  /** NOTE: test-only; promotes block to MRU in the LRU cache as a side effect. */
+  private[window] def peekLevelSize(blockIdx: Int, level: Int): Int = {
+    val levels = ensureBlockLevels(blockIdx)
+    levels(level).length
+  }
+
+  /** NOTE: test-only; promotes block to MRU in the LRU cache as a side effect. */
+  private[window] def peekLevelCount(blockIdx: Int): Int = {
+    val levels = ensureBlockLevels(blockIdx)
+    levels.length
+  }
+
+  // ---------- Internals ----------
+
+  private def computeBlockAggregates(
+      array: ExternalAppendOnlyUnsafeRowArray,
+      n: Int,
+      nBlocks: Int): Array[InternalRow] = {
+    if (n == 0) return Array.empty
+    val result = new Array[InternalRow](nBlocks)
+    val iter = array.generateIterator()
+    var b = 0
+    while (b < nBlocks) {
+      val buf = newBuffer()
+      initProj.target(buf)(InternalRow.empty)
+      val start = b * blockSize
+      val end = math.min(start + blockSize, n)
+      var i = start
+      while (i < end) {
+        if (!iter.hasNext) {
+          throw SparkException.internalError("rowArray iterator exhausted unexpectedly")
+        }
+        val row = iter.next()
+        updateProj.target(buf)(joinedRow(buf, row))
+        i += 1
+      }
+      result(b) = buf
+      b += 1
+    }
+    result
+  }
+
+  /** Merge `src` buffer into `dst` buffer using mergeProj. */
+  private def mergeInto(dst: InternalRow, src: InternalRow): Unit = {
+    mergeProj.target(dst)(joinedRow(dst, src))
+  }
+
+  private def newBuffer(): InternalRow =
+    new SpecificInternalRow(bufferDataTypes)
+
+  /** Merge the given leaf range [lo, hi) inside `blockIdx` into `out`. */
+  private def mergeBlockRange(
+      blockIdx: Int, lo: Int, hi: Int, out: InternalRow): Unit = {
+    if (lo >= hi) return
+    val levels = ensureBlockLevels(blockIdx)
+    val blockRows = levels(0).length
+    val topLevel = levels.length - 1
+    queryDescend(levels, blockRows, topLevel, 0, lo, hi, out)
+  }
+
+  /**
+   * Descend the (per-block) segment tree merging any node fully contained
+   *  in [queryLo, queryHi) into `out`. A node at (level L, index idx) covers
+   *  leaves `[idx * span, min((idx+1)*span, blockRows))` where span = F^L.
+   */
+  private def queryDescend(
+      levels: Array[Array[InternalRow]],
+      blockRows: Int,
+      level: Int,
+      idx: Int,
+      queryLo: Int,
+      queryHi: Int,
+      out: InternalRow): Unit = {
+    val span = spans(level)
+    val nodeLo = idx * span
+    val nodeHi = math.min(nodeLo + span, blockRows)
+    if (queryLo >= nodeHi || queryHi <= nodeLo) return
+    if (queryLo <= nodeLo && nodeHi <= queryHi) {
+      mergeInto(out, levels(level)(idx))
+      return
+    }
+    val childLevel = level - 1
+    val childLevelSize = levels(childLevel).length
+    var c = 0
+    while (c < fanout) {
+      val childIdx = idx * fanout + c
+      if (childIdx < childLevelSize) {
+        queryDescend(levels, blockRows, childLevel, childIdx, queryLo, queryHi, out)
+      }
+      c += 1
+    }
+  }
+
+  /**
+   * Build (or fetch from LRU) the full per-block levels array.
+   *  Protocol: acquire memory -> build -> cache. Eviction on capacity
+   *  overflow or on TMM spill request.
+   */
+  private def ensureBlockLevels(blockIdx: Int): Array[Array[InternalRow]] = {
+    val cached = blockLevelsCache.get(Integer.valueOf(blockIdx))
+    if (cached != null) return cached
+
+    // Enforce LRU capacity before building a new entry (I3).
+    val cap = maxCachedBlocks.getOrElse(Int.MaxValue)
+    while (blockLevelsCache.size() >= cap) {
+      if (!evictEldest()) return throwCacheEvictFailed(blockIdx)
+    }
+
+    // Acquire accounting; on partial grant, try one manual evict-and-retry.
+    if (!acquireBlockMemory()) {
+      if (!evictEldest() || !acquireBlockMemory()) {
+        // scalastyle:off throwerror
+        throw QueryExecutionErrors.cannotAcquireMemoryForWindowAggregateError(
+          blockBytes, 0L)
+        // scalastyle:on throwerror
+      }
+    }
+
+    // If buildBlockLevels throws, release the just-acquired memory (I4).
+    val levels =
+      try buildBlockLevels(blockIdx)
+      catch { case t: Throwable => releaseBlockMemory(); throw t }
+    blockLevelsCache.put(Integer.valueOf(blockIdx), levels)
+    levels
+  }
+
+  private def buildBlockLevels(blockIdx: Int): Array[Array[InternalRow]] = {
+    val blockStart = blockIdx * blockSize
+    val blockRows = math.min(blockSize, numRows - blockStart)
+
+    // Level 0: one aggregate per row in the block.
+    val leaves = new Array[InternalRow](blockRows)
+    val iter = rowArray.generateIterator(blockStart)
+    var i = 0
+    while (i < blockRows) {
+      if (!iter.hasNext) {
+        throw SparkException.internalError(
+          s"rowArray iterator exhausted at block $blockIdx row $i")
+      }
+      val row = iter.next()
+      val buf = newBuffer()
+      initProj.target(buf)(InternalRow.empty)
+      updateProj.target(buf)(joinedRow(buf, row))
+      leaves(i) = buf
+      i += 1
+    }
+
+    val allLevels = mutable.ArrayBuffer[Array[InternalRow]](leaves)
+    var prev = leaves
+    while (prev.length > 1) {
+      val parentCount = (prev.length + fanout - 1) / fanout
+      val parents = new Array[InternalRow](parentCount)
+      var p = 0
+      while (p < parentCount) {
+        val buf = newBuffer()
+        initProj.target(buf)(InternalRow.empty)
+        val childStart = p * fanout
+        val childEnd = math.min(childStart + fanout, prev.length)
+        var c = childStart
+        while (c < childEnd) {
+          mergeInto(buf, prev(c))
+          c += 1
+        }
+        parents(p) = buf
+        p += 1
+      }
+      allLevels += parents
+      prev = parents
+    }
+    allLevels.toArray
+  }
+
+  private def throwCacheEvictFailed(blockIdx: Int): Nothing = {
+    throw SparkException.internalError(
+      s"LRU cache eviction failed for block $blockIdx (size=${blockLevelsCache.size})")
+  }
+
+  // ---------- Memory accounting helpers ----------
+
+  /**
+   * Try to acquire `blockBytes` for one cached block. Returns true on full
+   * grant, false on partial (after rolling the partial grant back). Must
+   * not be called from within [[SegTreeSpiller.spill]] (I1).
+   */
+  private def acquireBlockMemory(): Boolean = {
+    val granted = spiller.acquireMemory(blockBytes)
+    if (granted < blockBytes) {
+      if (granted > 0) spiller.freeMemory(granted)
+      false
+    } else {
+      true
+    }
+  }
+
+  /**
+   * Release the accounting for one block. Caller ensures pairing with a
+   *  prior successful [[acquireBlockMemory]] (I4).
+   */
+  private def releaseBlockMemory(): Unit = {
+    spiller.freeMemory(blockBytes)
+  }
+
+  /**
+   * Evict LRU blocks until `target` bytes have been freed (or cache is
+   *  empty). Returns freed bytes. Called from [[SegTreeSpiller.spill]].
+   */
+  private def evictUntil(target: Long): Long = {
+    var freed = 0L
+    while (freed < target && !blockLevelsCache.isEmpty) {
+      freed += evictEldestReturnBytes()
+    }
+    freed
+  }
+
+  /** Evict one LRU block. Returns true if a block was evicted. */
+  private def evictEldest(): Boolean = {
+    if (blockLevelsCache.isEmpty) return false
+    evictEldestReturnBytes()
+    true
+  }
+
+  private def evictEldestReturnBytes(): Long = {
+    val it = blockLevelsCache.entrySet().iterator()
+    if (!it.hasNext) return 0L
+    val head = it.next()
+    it.remove()
+    releaseBlockMemory()
+    blockBytes
+  }
+
+  /** Release accounting for all cached blocks and clear the cache. */
+  private def releaseAllCachedBlocks(): Unit = {
+    val n = blockLevelsCache.size()
+    if (n > 0) {
+      blockLevelsCache.clear()
+      spiller.freeMemory(n.toLong * blockBytes)
+    }
+  }
+
+  private def closeRowArray(): Unit = {
+    // rowArray is caller-owned (see `build` docstring); drop the reference only.
+    rowArray = null
+  }
+}
+
+private[window] object WindowSegmentTree {
+  val DefaultFanout: Int = 16
+  val DefaultBlockSize: Int = 65536
+
+  /**
+   * Explicit allowlist of [[DeclarativeAggregate]] subclasses safe for
+   * segment-tree execution. Safe iff combine semantics form a commutative
+   * monoid on the partial-buffer representation (associativity +
+   * compatibility with `mergeExpressions`):
+   *
+   *   - [[Min]], [[Max]]: idempotent semilattice.
+   *   - [[Sum]], [[Count]]: additive monoid.
+   *   - [[Average]]: sum + count, both additive monoids.
+   *   - [[StddevPop]], [[StddevSamp]], [[VariancePop]], [[VarianceSamp]]:
+   *     Welford (count, mean, M2) is associative -- see
+   *     CentralMomentAgg.mergeExpressions.
+   *
+   * Intentionally excluded (tracked as follow-up): HyperLogLogPlusPlus /
+   * ApproxCountDistinct (sketch-buffer interaction unaudited), First / Last
+   * (order-dependent), CollectList / CollectSet (unbounded buffer growth),
+   * Percentile / ApproxPercentile (sorted-sketch buffer), and any
+   * ImperativeAggregate (excluded by the type check).
+   *
+   * Callers should use [[isEligible]] rather than `contains` directly.
+   */
+  val EligibleAggregates: Set[Class[_ <: DeclarativeAggregate]] = Set(
+    classOf[Min],
+    classOf[Max],
+    classOf[Sum],
+    classOf[Count],
+    classOf[Average],
+    classOf[StddevPop],
+    classOf[StddevSamp],
+    classOf[VariancePop],
+    classOf[VarianceSamp]
+  )
+
+  /**
+   * Returns true iff `f` is a [[DeclarativeAggregate]] on the explicit segment-tree
+   * allowlist. See [[EligibleAggregates]] for the rationale and excluded aggregates.
+   */
+  def isEligible(f: Expression): Boolean = f match {
+    case agg: DeclarativeAggregate => EligibleAggregates.contains(agg.getClass)
+    case _ => false
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 9bd68cbe72a07..abfdf3e767874 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -29,13 +29,13 @@ import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.classic.{SparkSession, Strategy, StreamingCheckpointManager, StreamingQueryManager, UDFRegistration}
-import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.DefaultCatalogManager
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode, QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.adaptive.AdaptiveRulesHolder
 import org.apache.spark.sql.execution.aggregate.{ResolveEncodersInScalaAgg, ScalaUDAF}
 import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
-import org.apache.spark.sql.execution.command.CommandCheck
+import org.apache.spark.sql.execution.command.{CheckViewReferences, CommandCheck}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.v2.{TableCapabilityCheck, V2SessionCatalog}
 import org.apache.spark.sql.execution.streaming.runtime.ResolveWriteToStream
@@ -161,7 +161,7 @@ abstract class BaseSessionStateBuilder(
   protected lazy val v2SessionCatalog = new V2SessionCatalog(catalog)
 
   protected lazy val catalogManager = {
-    val cm = new CatalogManager(v2SessionCatalog, catalog)
+    val cm = new DefaultCatalogManager(v2SessionCatalog, catalog)
     parentState.foreach(ps => cm.copySessionPathFrom(ps.catalogManager))
     cm
   }
@@ -189,7 +189,7 @@ abstract class BaseSessionStateBuilder(
    *
    * Note: this depends on the `conf` and `catalog` fields.
    */
-  protected def analyzer: Analyzer = new Analyzer(catalogManager, sharedRelationCache) {
+  protected def analyzer: Analyzer = new Analyzer(catalogManager, sharedRelationCache, Some(conf)) {
     override val hintResolutionRules: Seq[Rule[LogicalPlan]] =
       customHintResolutionRules
 
@@ -259,6 +259,7 @@ abstract class BaseSessionStateBuilder(
         HiveOnlyCheck +:
         TableCapabilityCheck +:
         CommandCheck +:
+        CheckViewReferences +:
         ViewSyncSchemaToMetaStore +:
         customCheckRules
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
index 9124c1b889098..a56aa90d6d72b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, SQLException}
+import java.util.Locale
 
 import scala.collection.mutable.ArrayBuilder
 
+import org.apache.spark.sql.catalyst.plans.logical.SampleMethod
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.types._
@@ -71,10 +73,10 @@ private case class DatabricksDialect() extends JdbcDialect with NoLegacyJDBCErro
 
   override def supportsOffset: Boolean = true
 
-  override def supportsTableSample: Boolean = true
-
-  override def getTableSample(sample: TableSampleInfo): String = {
-    s"TABLESAMPLE (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})"
+  override def compileTableSample(sample: TableSampleInfo): Option[String] = {
+    if (sample.withReplacement || sample.sampleMethod == SampleMethod.System) return None
+    Some(s"TABLESAMPLE ${sample.sampleMethod.toString.toUpperCase(Locale.ROOT)}" +
+      s" (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})")
   }
 
   override def supportsHint: Boolean = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 1ddf22834fbea..a34d23512e996 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -31,6 +31,7 @@ import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.plans.logical.SampleMethod
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{localDateTimeToMicros, toJavaTimestampNoRebase}
 import org.apache.spark.sql.catalyst.util.IntervalUtils.{fromDayTimeString, fromYearMonthString, getDuration}
@@ -40,7 +41,7 @@ import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, Literal, NamedReference}
 import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc
-import org.apache.spark.sql.connector.expressions.filter.Predicate
+import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, Predicate}
 import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions, JdbcOptionsInWrite, JdbcUtils}
@@ -399,6 +400,17 @@ abstract class JdbcDialect extends Serializable with Logging {
   }
 
   private[jdbc] class JDBCSQLBuilder extends V2ExpressionSQLBuilder {
+    // SPARK-53454: Produce portable SQL for AlwaysTrue/AlwaysFalse predicates.
+    // Some databases (Oracle, DB2) do not support bare TRUE/FALSE in WHERE clauses.
+    // The result is parenthesized so it stays valid when nested as an operand of a
+    // larger expression (e.g. "a" = (1 = 1) or (1 = 1) IS NOT NULL), not just as a
+    // standalone WHERE predicate.
+    override def build(expr: Expression): String = expr match {
+      case _: AlwaysTrue => "(1 = 1)"
+      case _: AlwaysFalse => "(1 = 0)"
+      case _ => super.build(expr)
+    }
+
     // Some dialects do not support boolean type and this convenient util function is
     // provided to generate SQL string without boolean values.
     protected def inputToSQLNoBool(input: Expression): String = input match {
@@ -868,11 +880,32 @@ abstract class JdbcDialect extends Serializable with Logging {
    */
   def supportsOffset: Boolean = false
 
+  @deprecated("Use compileTableSample instead", "4.2.0")
   def supportsTableSample: Boolean = false
 
+  @deprecated("Use compileTableSample instead", "4.2.0")
   def getTableSample(sample: TableSampleInfo): String =
     throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3183")
 
+  /**
+   * Compile a [[org.apache.spark.sql.execution.datasources.v2.TableSampleInfo]] into a
+   * SQL `TABLESAMPLE` clause, or return [[scala.None]] if the dialect cannot represent
+   * the requested sampling semantics (e.g. sampling with replacement).
+   *
+   * The default implementation delegates to [[getTableSample]] when [[supportsTableSample]]
+   * is true and the requested sample is BERNOULLI without replacement (the contract
+   * predating this method), and returns [[scala.None]] otherwise.
+   */
+  @Since("4.2.0")
+  def compileTableSample(sample: TableSampleInfo): Option[String] = {
+    if (supportsTableSample && !sample.withReplacement &&
+        sample.sampleMethod == SampleMethod.Bernoulli) {
+      Some(getTableSample(sample))
+    } else {
+      None
+    }
+  }
+
   def supportsHint: Boolean = false
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcSQLQueryBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcSQLQueryBuilder.scala
index 93af5890711c6..4dd6631699cb7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcSQLQueryBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcSQLQueryBuilder.scala
@@ -172,12 +172,22 @@ class JdbcSQLQueryBuilder(dialect: JdbcDialect, options: JDBCOptions) {
   /**
    * Constructs the table sample clause that following dialect's SQL syntax.
    */
+  @deprecated("Use withTableSampleClause(String) instead", "4.2.0")
   def withTableSample(sample: TableSampleInfo): JdbcSQLQueryBuilder = {
     tableSampleClause = dialect.getTableSample(sample)
 
     this
   }
 
+  /**
+   * Sets a pre-compiled table sample clause directly.
+   */
+  def withTableSampleClause(clause: String): JdbcSQLQueryBuilder = {
+    tableSampleClause = clause
+
+    this
+  }
+
   /**
    * Represents JOIN subquery in case Join has been pushed down. This value should be used
    * instead of options.tableOrQuery if join has been pushed down.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 41452ca45057c..a047085a35378 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -99,19 +99,12 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No
       }
     }
 
-    override def visitStartsWith(l: String, r: String): String = {
-      val value = r.substring(1, r.length() - 1)
-      s"$l LIKE '${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
-    }
-
-    override def visitEndsWith(l: String, r: String): String = {
-      val value = r.substring(1, r.length() - 1)
-      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}' ESCAPE '\\\\'"
-    }
-
-    override def visitContains(l: String, r: String): String = {
-      val value = r.substring(1, r.length() - 1)
-      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
+    // MySQL treats backslash as an escape character inside string literals, so every backslash in
+    // a LIKE pattern (and the ESCAPE character) must be doubled to survive string-literal parsing
+    // before the LIKE engine applies its own escaping. The base STARTS_WITH/ENDS_WITH/CONTAINS
+    // pattern building is otherwise shared, so only this hook is overridden.
+    override def escapeStringLiteralForLikePattern(str: String): String = {
+      str.replace("\\", "\\\\")
     }
 
     override def visitAggregateFunction(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 606d8f69760de..dd57c129179ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -376,15 +376,12 @@ private case class PostgresDialect()
 
   override def supportsOffset: Boolean = true
 
-  override def supportsTableSample: Boolean = true
-
   override def supportsJoin: Boolean = true
 
-  override def getTableSample(sample: TableSampleInfo): String = {
-    // hard-coded to BERNOULLI for now because Spark doesn't have a way to specify sample
-    // method name
-    "TABLESAMPLE BERNOULLI" +
-      s" (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})"
+  override def compileTableSample(sample: TableSampleInfo): Option[String] = {
+    if (sample.withReplacement) return None
+    Some(s"TABLESAMPLE ${sample.sampleMethod.toString.toUpperCase(Locale.ROOT)}" +
+      s" (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})")
   }
 
   override def renameTable(oldTable: Identifier, newTable: Identifier): String = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala
index 0ad1974b7d769..c8a893f374f39 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingLocalVariableManager.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{FakeLocalCatalog, ResolvedIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{VariableDefinition, VariableManager}
+import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.errors.DataTypeErrorsBase
 import org.apache.spark.sql.errors.QueryCompilationErrors.unresolvedVariableError
@@ -47,13 +48,18 @@ class SqlScriptingLocalVariableManager(context: SqlScriptingExecutionContext)
     context.currentScope.variables.put(name, varDef)
   }
 
-  override def set(nameParts: Seq[String], varDef: VariableDefinition): Unit = {
+  override def set(
+      nameParts: Seq[String],
+      varDef: VariableDefinition,
+      origin: Origin): Unit = {
     val scope = findScopeOfVariable(nameParts)
       .getOrElse(
-        throw unresolvedVariableError(nameParts, varDef.identifier.namespace().toIndexedSeq))
+        throw unresolvedVariableError(
+          nameParts, Seq(varDef.identifier.namespace().toIndexedSeq), origin))
 
     if (!scope.variables.contains(nameParts.last)) {
-      throw unresolvedVariableError(nameParts, varDef.identifier.namespace().toIndexedSeq)
+      throw unresolvedVariableError(
+        nameParts, Seq(varDef.identifier.namespace().toIndexedSeq), origin)
     }
 
     scope.variables.put(nameParts.last, varDef)
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
index 91d3f9a484e24..59a1264993e19 100644
--- a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
@@ -19,12 +19,14 @@ package org.apache.spark.status.api.v1.sql
 
 import java.util.{Date, HashMap}
 
+import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Success, Try}
 
 import jakarta.ws.rs._
 import jakarta.ws.rs.core.{Context, MediaType, UriInfo}
 
 import org.apache.spark.JobExecutionStatus
+import org.apache.spark.internal.config.UI.UI_SQL_GROUP_SUB_EXECUTION_ENABLED
 import org.apache.spark.sql.execution.ui.{SparkPlanGraph, SparkPlanGraphCluster, SparkPlanGraphNode, SQLAppStatusStore, SQLExecutionUIData}
 import org.apache.spark.status.api.v1.{BaseAppResource, NotFoundException}
 import org.apache.spark.ui.UIUtils
@@ -74,6 +76,11 @@ private[v1] class SqlResource extends BaseAppResource {
    * Server-side DataTables endpoint for SQL executions listing.
    * Accepts DataTables server-side parameters (start, length, order, search)
    * and returns paginated results with recordsTotal/recordsFiltered counts.
+   *
+   * When `groupSubExecution=true` (default = `spark.ui.groupSQLSubExecutionEnabled`),
+   * pagination is over root executions only and each row carries its sub-executions
+   * inline as `subExecutions: [...]`. Sub-executions whose root is missing from the
+   * filtered set (orphans) are surfaced as roots so they don't disappear.
    */
   @GET
   @Path("sqlTable")
@@ -85,7 +92,11 @@ private[v1] class SqlResource extends BaseAppResource {
       // Echo draw counter to prevent stale responses
       val draw = Option(uriParams.getFirst("draw")).map(_.toInt).getOrElse(0)
 
-      val totalRecords = sqlStore.executionsCount()
+      // Sub-execution grouping flag; default to the cluster config. Defensive
+      // parse - bad values should not 500 the public REST endpoint.
+      val groupSubExec = Option(uriParams.getFirst("groupSubExecution"))
+        .flatMap(v => Try(v.toBoolean).toOption)
+        .getOrElse(ui.conf.get(UI_SQL_GROUP_SUB_EXECUTION_ENABLED))
 
       // Search and status filter
       val searchValue = Option(uriParams.getFirst("search[value]"))
@@ -94,9 +105,14 @@ private[v1] class SqlResource extends BaseAppResource {
         .filter(_.nonEmpty)
       val needsFilter = searchValue.isDefined || statusFilter.isDefined
 
+      // Always load all execs once. We need the full set to (a) identify orphan
+      // sub-executions whose root is filtered out and (b) count root rows for
+      // `recordsTotal`. `sqlStore.executionsList()` is already a full
+      // materialization, so there is no separate "KVStore-pagination" path being
+      // disabled here.
+      val allExecs = sqlStore.executionsList()
+
       val filteredExecs = if (needsFilter) {
-        // When filtering, we must load all and filter in memory
-        val allExecs = sqlStore.executionsList()
         allExecs.filter { exec =>
           val matchesSearch = searchValue.forall { search =>
             val lower = search.toLowerCase(java.util.Locale.ROOT)
@@ -110,10 +126,14 @@ private[v1] class SqlResource extends BaseAppResource {
           matchesSearch && matchesStatus
         }
       } else {
-        // No filter — will use KVStore pagination below
-        Seq.empty
+        allExecs
+      }
+
+      val (rootRows, subsByRoot) = if (groupSubExec) {
+        SqlResource.partitionRoots(filteredExecs)
+      } else {
+        (filteredExecs, Map.empty[Long, Seq[SQLExecutionUIData]])
       }
-      val filteredRecords = if (needsFilter) filteredExecs.size else totalRecords
 
       // Sort
       val sortCol = Option(uriParams.getFirst("order[0][column]"))
@@ -125,26 +145,43 @@ private[v1] class SqlResource extends BaseAppResource {
       val start = Option(uriParams.getFirst("start")).map(_.toInt).getOrElse(0)
       val length = Option(uriParams.getFirst("length")).map(_.toInt).getOrElse(20)
 
-      val page = if (needsFilter) {
-        // Filter/search: sort and paginate in memory
-        val sorted = sortExecs(filteredExecs, sortCol, sortDir)
-        if (length > 0) sorted.slice(start, start + length) else sorted
-      } else {
-        // No filter: use KVStore-level pagination for efficiency
-        // KVStore returns in insertion order; sort in memory for the page
-        val execs = sqlStore.executionsList()
-        val sorted = sortExecs(execs, sortCol, sortDir)
-        if (length > 0) sorted.slice(start, start + length) else sorted
+      val sortedRoots = sortExecs(rootRows, sortCol, sortDir)
+      val page = if (length > 0) sortedRoots.slice(start, start + length) else sortedRoots
+
+      // Convert to Java-compatible row data; embed sub-executions when grouping.
+      // Always emit a `subExecutions` field (possibly empty) in grouped mode so
+      // JSON consumers see a consistent schema; flat mode never includes it.
+      val aaData = page.map { exec =>
+        val row = execToRow(exec)
+        if (groupSubExec) {
+          val subs = subsByRoot.getOrElse(exec.executionId, Seq.empty)
+          // Sort subs by id ascending so they appear in chronological order
+          row.put("subExecutions", sortExecs(subs, "id", "asc").map(execToRow).asJava)
+        }
+        row
       }
 
-      // Convert to Java-compatible row data
-      val aaData = page.map(execToRow)
+      // Counts: grouped totals reflect root-only counts so DataTables shows
+      // "Showing X to Y of Z entries" matching the rows the user actually sees.
+      // Flat mode's recordsTotal is the unfiltered total (from the KVStore),
+      // which lets DataTables show the "filtered from W total entries" suffix.
+      val recordsTotal = if (groupSubExec) {
+        if (needsFilter) {
+          // Re-derive root rows from the unfiltered set using the same predicate
+          SqlResource.partitionRoots(allExecs)._1.size
+        } else {
+          rootRows.size
+        }
+      } else {
+        sqlStore.executionsCount()
+      }
+      val recordsFiltered = if (groupSubExec) rootRows.size else filteredExecs.size
 
       val ret = new HashMap[String, Object]()
       ret.put("draw", Integer.valueOf(draw))
       ret.put("aaData", aaData)
-      ret.put("recordsTotal", java.lang.Long.valueOf(filteredRecords))
-      ret.put("recordsFiltered", java.lang.Long.valueOf(filteredRecords))
+      ret.put("recordsTotal", java.lang.Long.valueOf(recordsTotal))
+      ret.put("recordsFiltered", java.lang.Long.valueOf(recordsFiltered))
       ret
     }
   }
@@ -275,3 +312,22 @@ private[v1] class SqlResource extends BaseAppResource {
   }
 
 }
+
+private[v1] object SqlResource {
+
+  /**
+   * Split a set of executions into root rows and a sub-execution map. A root row is
+   * either an execution whose id equals its rootExecutionId, or an orphan sub whose
+   * root parent is absent from the input set. Called on the filtered set (for paging)
+   * and on the full set (for `recordsTotal`), so the predicate lives in one place
+   * rather than being inlined twice.
+   */
+  def partitionRoots(execs: Seq[SQLExecutionUIData])
+      : (Seq[SQLExecutionUIData], Map[Long, Seq[SQLExecutionUIData]]) = {
+    val ids = execs.iterator.map(_.executionId).toSet
+    val (roots, subs) = execs.partition { e =>
+      e.executionId == e.rootExecutionId || !ids.contains(e.rootExecutionId)
+    }
+    (roots, subs.groupBy(_.rootExecutionId))
+  }
+}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 14f36cbae055b..c56db6c9b7e3f 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -363,6 +363,7 @@
 | org.apache.spark.sql.catalyst.expressions.ThetaIntersection | theta_intersection | SELECT theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct<theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12))):bigint> |
 | org.apache.spark.sql.catalyst.expressions.ThetaSketchEstimate | theta_sketch_estimate | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct<theta_sketch_estimate(theta_sketch_agg(col, 12)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.ThetaUnion | theta_union | SELECT theta_sketch_estimate(theta_union(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct<theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12)):bigint> |
+| org.apache.spark.sql.catalyst.expressions.TimeBucketExpressionBuilder | time_bucket | SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00') | struct<time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.TimeDiff | time_diff | SELECT time_diff('HOUR', TIME'20:30:29', TIME'21:30:28') | struct<time_diff(HOUR, TIME '20:30:29', TIME '21:30:28'):bigint> |
 | org.apache.spark.sql.catalyst.expressions.TimeFromMicros | time_from_micros | SELECT time_from_micros(0) | struct<time_from_micros(0):time(6)> |
 | org.apache.spark.sql.catalyst.expressions.TimeFromMillis | time_from_millis | SELECT time_from_millis(0) | struct<time_from_millis(0):time(6)> |
@@ -536,11 +537,12 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<var_pop(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<var_samp(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | variance | SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<variance(col):double> |
-| org.apache.spark.sql.catalyst.expressions.st.ST_AsBinary | st_asbinary | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))):string> |
-| org.apache.spark.sql.catalyst.expressions.st.ST_GeogFromWKB | st_geogfromwkb | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))):string> |
-| org.apache.spark.sql.catalyst.expressions.st.ST_GeomFromWKB | st_geomfromwkb | SELECT hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0))):string> |
+| org.apache.spark.sql.catalyst.expressions.st.ST_AsBinary | st_asbinary | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), NDR)):string> |
+| org.apache.spark.sql.catalyst.expressions.st.ST_GeogFromWKB | st_geogfromwkb | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), NDR)):string> |
+| org.apache.spark.sql.catalyst.expressions.st.ST_GeomFromWKB | st_geomfromwkb | SELECT hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), NDR)):string> |
 | org.apache.spark.sql.catalyst.expressions.st.ST_SetSrid | st_setsrid | SELECT st_srid(st_setsrid(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 4326)) | struct<st_srid(st_setsrid(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), 4326)):int> |
 | org.apache.spark.sql.catalyst.expressions.st.ST_Srid | st_srid | SELECT st_srid(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')) | struct<st_srid(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')):int> |
+| org.apache.spark.sql.catalyst.expressions.variant.IsValidVariant | is_valid_variant | SELECT is_valid_variant(parse_json('null')) | struct<is_valid_variant(parse_json(null)):boolean> |
 | org.apache.spark.sql.catalyst.expressions.variant.IsVariantNull | is_variant_null | SELECT is_variant_null(parse_json('null')) | struct<is_variant_null(parse_json(null)):boolean> |
 | org.apache.spark.sql.catalyst.expressions.variant.ParseJsonExpressionBuilder | parse_json | SELECT parse_json('{"a":1,"b":0.8}') | struct<parse_json({"a":1,"b":0.8}):variant> |
 | org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariant | schema_of_variant | SELECT schema_of_variant(parse_json('null')) | struct<schema_of_variant(parse_json(null)):string> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
index db5d61ebcc445..8915835fffe88 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
@@ -97,7 +97,7 @@ org.apache.spark.sql.AnalysisException
 desc formatted char_view
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`char_view`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, `spark_catalog`.`default`.`char_view`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, spark_catalog.default.char_view
 
 
 -- !query
@@ -189,7 +189,7 @@ AlterViewAsCommand `spark_catalog`.`default`.`char_view`, select * from char_tbl
 desc formatted char_view
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`char_view`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, `spark_catalog`.`default`.`char_view`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, spark_catalog.default.char_view
 
 
 -- !query
@@ -215,7 +215,7 @@ AlterTableSetPropertiesCommand `spark_catalog`.`default`.`char_view`, [yes=no],
 desc formatted char_view
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`char_view`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, `spark_catalog`.`default`.`char_view`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, spark_catalog.default.char_view
 
 
 -- !query
@@ -241,7 +241,7 @@ AlterTableUnsetPropertiesCommand `spark_catalog`.`default`.`char_view`, [yes], f
 desc formatted char_view
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`char_view`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, `spark_catalog`.`default`.`char_view`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.char_view, spark_catalog.default.char_view
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
index f284b4cabdb45..3519577e2bef4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
@@ -313,28 +313,28 @@ org.apache.spark.sql.AnalysisException
 DESC v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
 DESC TABLE v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
 DESC FORMATTED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
 DESC EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
index 4b9bb859cd567..3f16d4f756511 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
@@ -174,7 +174,7 @@ EXPLAIN FORMATTED
   CREATE VIEW explain_view AS
     SELECT key, val FROM explain_temp1
 -- !query analysis
-ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode
+ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, false, FormattedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
index 4b9bb859cd567..3f16d4f756511 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
@@ -174,7 +174,7 @@ EXPLAIN FORMATTED
   CREATE VIEW explain_view AS
     SELECT key, val FROM explain_temp1
 -- !query analysis
-ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode
+ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, false, FormattedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out
new file mode 100644
index 0000000000000..3def19963fc08
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-nulltype-single-pass.sql.out
@@ -0,0 +1,8 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS a#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out
index 9f34e1a6e4ea8..789f5bcb23629 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/extract-value-resolution-edge-cases.sql.out
@@ -38,3 +38,30 @@ DROP TABLE t1
 -- !query analysis
 DropTable false, false
 +- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1
+
+
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS a#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT col[0] FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS col[0]#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT col['key'] FROM (SELECT null AS col) t
+-- !query analysis
+Project [null AS col[key]#x]
++- SubqueryAlias t
+   +- Project [null AS col#x]
+      +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out
index cfcd9c5c42e28..11f5f948d8f69 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/having-and-order-by-recursive-type-name-resolution.sql.out
@@ -500,3 +500,12 @@ Project [sum_val#x]
       +- Aggregate [col1#x], [(col1#x.nums[0] + col1#x.nums[1]) AS sum_val#x, col1#x]
          +- SubqueryAlias t
             +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1
+-- !query analysis
+Filter (cast(null as int) = 1)
++- Aggregate [col1#x], [named_struct(a, 1) AS col1#x]
+   +- SubqueryAlias t
+      +- LocalRelation [col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
index 4666e62b2d690..cb46c265512ff 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
@@ -3064,6 +3064,136 @@ Project [1 AS 1#x]
                +- LocalRelation [col1#x, col2#x]
 
 
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+     nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k)
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias nj1
+:     +- Project [col1#x AS k#x, col2#x AS v1#x]
+:        +- LocalRelation [col1#x, col2#x]
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias nj2
+:     +- Project [col1#x AS k#x, col2#x AS v2#x]
+:        +- LocalRelation [col1#x, col2#x]
++- Project [k#x, v1#x, v2#x, unq_k#x]
+   +- LateralJoin lateral-subquery#x [k#x], Inner
+      :  +- SubqueryAlias __auto_generated_subquery_name
+      :     +- Project [outer(k#x) AS unq_k#x]
+      :        +- OneRowRelation
+      +- Project [k#x, v1#x, v2#x]
+         +- Join Inner, (k#x = k#x)
+            :- SubqueryAlias nj1
+            :  +- CTERelationRef xxxx, true, [k#x, v1#x], false, false, 1
+            +- SubqueryAlias nj2
+               +- CTERelationRef xxxx, true, [k#x, v2#x], false, false, 1
+
+
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+     nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k, nj1.k AS qual_nj1k, nj2.k AS qual_nj2k)
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias nj1
+:     +- Project [col1#x AS k#x, col2#x AS v1#x]
+:        +- LocalRelation [col1#x, col2#x]
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias nj2
+:     +- Project [col1#x AS k#x, col2#x AS v2#x]
+:        +- LocalRelation [col1#x, col2#x]
++- Project [k#x, v1#x, v2#x, unq_k#x, qual_nj1k#x, qual_nj2k#x]
+   +- Project [k#x, v1#x, v2#x, unq_k#x, qual_nj1k#x, qual_nj2k#x]
+      +- LateralJoin lateral-subquery#x [k#x && k#x && k#x], Inner
+         :  +- SubqueryAlias __auto_generated_subquery_name
+         :     +- Project [outer(k#x) AS unq_k#x, outer(k#x) AS qual_nj1k#x, outer(k#x) AS qual_nj2k#x]
+         :        +- OneRowRelation
+         +- Project [k#x, v1#x, v2#x, k#x]
+            +- Join Inner, (k#x = k#x)
+               :- SubqueryAlias nj1
+               :  +- CTERelationRef xxxx, true, [k#x, v1#x], false, false, 1
+               +- SubqueryAlias nj2
+                  +- CTERelationRef xxxx, true, [k#x, v2#x], false, false, 1
+
+
+-- !query
+WITH uj1(k, v1) AS (VALUES (1, 'a')),
+     uj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM uj1 JOIN uj2 USING (k),
+LATERAL (SELECT k AS unq_k, uj1.k AS qual_uj1k, uj2.k AS qual_uj2k)
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias uj1
+:     +- Project [col1#x AS k#x, col2#x AS v1#x]
+:        +- LocalRelation [col1#x, col2#x]
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias uj2
+:     +- Project [col1#x AS k#x, col2#x AS v2#x]
+:        +- LocalRelation [col1#x, col2#x]
++- Project [k#x, v1#x, v2#x, unq_k#x, qual_uj1k#x, qual_uj2k#x]
+   +- Project [k#x, v1#x, v2#x, unq_k#x, qual_uj1k#x, qual_uj2k#x]
+      +- LateralJoin lateral-subquery#x [k#x && k#x && k#x], Inner
+         :  +- SubqueryAlias __auto_generated_subquery_name
+         :     +- Project [outer(k#x) AS unq_k#x, outer(k#x) AS qual_uj1k#x, outer(k#x) AS qual_uj2k#x]
+         :        +- OneRowRelation
+         +- Project [k#x, v1#x, v2#x, k#x]
+            +- Join Inner, (k#x = k#x)
+               :- SubqueryAlias uj1
+               :  +- CTERelationRef xxxx, true, [k#x, v1#x], false, false, 1
+               +- SubqueryAlias uj2
+                  +- CTERelationRef xxxx, true, [k#x, v2#x], false, false, 1
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'))
+SELECT * FROM (SELECT k FROM cte1 ORDER BY v1) sub,
+LATERAL (SELECT v1 AS leaked)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`v1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 107,
+    "stopIndex" : 108,
+    "fragment" : "v1"
+  } ]
+}
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'), (2, 'b'), (3, 'c'))
+SELECT * FROM (SELECT k FROM cte1 GROUP BY k) g,
+LATERAL (SELECT v1 AS leaked)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`v1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 124,
+    "stopIndex" : 125,
+    "fragment" : "v1"
+  } ]
+}
+
+
 -- !query
 DROP VIEW t1
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/join-nearest-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/join-nearest-by.sql.out
new file mode 100644
index 0000000000000..3b7d9e55ca873
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/join-nearest-by.sql.out
@@ -0,0 +1,426 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE VIEW users(user_id, score) AS VALUES (1, 10.0), (2, 20.0), (3, 30.0)
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`users`, [(user_id,None), (score,None)], VALUES (1, 10.0), (2, 20.0), (3, 30.0), false, false, PersistedView, COMPENSATION, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE VIEW products(product, pscore) AS VALUES ('A', 11.0), ('B', 22.0), ('C', 5.0)
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`products`, [(product,None), (pscore,None)], VALUES ('A', 11.0), ('B', 22.0), ('C', 5.0), false, false, PersistedView, COMPENSATION, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, product#x]
++- NearestByJoin Inner, true, 1, -abs((score#x - pscore#x)), NearestBySimilarity
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- SubqueryAlias spark_catalog.default.products
+         +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+            +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT *
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, score#x, product#x, pscore#x]
++- NearestByJoin Inner, true, 1, -abs((score#x - pscore#x)), NearestBySimilarity
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- SubqueryAlias spark_catalog.default.products
+         +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+            +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT *
+FROM users u LEFT OUTER JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, score#x, product#x, pscore#x]
++- NearestByJoin LeftOuter, true, 1, -abs((score#x - pscore#x)), NearestBySimilarity
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- SubqueryAlias spark_catalog.default.products
+         +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+            +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, p.product, p.pscore
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, product#x, pscore#x]
++- NearestByJoin Inner, true, 2, abs((score#x - pscore#x)), NearestByDistance
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- SubqueryAlias spark_catalog.default.products
+         +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+            +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u INNER JOIN products p
+  EXACT NEAREST BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, product#x]
++- NearestByJoin Inner, false, 1, -abs((score#x - pscore#x)), NearestBySimilarity
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- SubqueryAlias spark_catalog.default.products
+         +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+            +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u LEFT OUTER JOIN (SELECT * FROM products WHERE false) p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, product#x]
++- NearestByJoin LeftOuter, true, 1, -abs((score#x - pscore#x)), NearestBySimilarity
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- Project [product#x, pscore#x]
+         +- Filter false
+            +- SubqueryAlias spark_catalog.default.products
+               +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+                  +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+                     +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u INNER JOIN (SELECT * FROM products WHERE false) p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, product#x]
++- NearestByJoin Inner, true, 1, -abs((score#x - pscore#x)), NearestBySimilarity
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- Project [product#x, pscore#x]
+         +- Filter false
+            +- SubqueryAlias spark_catalog.default.products
+               +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+                  +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+                     +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u INNER JOIN products p
+  APPROX NEAREST 1 BY DISTANCE abs(u.score - p.pscore)
+-- !query analysis
+Project [user_id#x, product#x]
++- NearestByJoin Inner, true, 1, abs((score#x - pscore#x)), NearestByDistance
+   :- SubqueryAlias u
+   :  +- SubqueryAlias spark_catalog.default.users
+   :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+   :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+   :           +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias p
+      +- SubqueryAlias spark_catalog.default.products
+         +- View (`spark_catalog`.`default`.`products`, [product#x, pscore#x])
+            +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT a.user_id AS a_id, b.user_id AS b_id
+FROM users a JOIN users b
+  APPROX NEAREST 1 BY DISTANCE abs(a.score - b.score)
+ORDER BY a.user_id, b.user_id
+-- !query analysis
+Project [a_id#x, b_id#x]
++- Sort [user_id#x ASC NULLS FIRST, user_id#x ASC NULLS FIRST], true
+   +- Project [user_id#x AS a_id#x, user_id#x AS b_id#x, user_id#x, user_id#x]
+      +- NearestByJoin Inner, true, 1, abs((score#x - score#x)), NearestByDistance
+         :- SubqueryAlias a
+         :  +- SubqueryAlias spark_catalog.default.users
+         :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+         :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+         :           +- LocalRelation [col1#x, col2#x]
+         +- SubqueryAlias b
+            +- SubqueryAlias spark_catalog.default.users
+               +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+                  +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+                     +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u RIGHT OUTER JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "joinType" : "RIGHT OUTER",
+    "supported" : "'INNER', 'LEFT OUTER'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 126,
+    "fragment" : "RIGHT OUTER JOIN products p\n  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 0 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "max" : "100000",
+    "min" : "1",
+    "numResults" : "0"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 114,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 0 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 100001 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "max" : "100000",
+    "min" : "1",
+    "numResults" : "100001"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 119,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 100001 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY map(u.score, p.pscore)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "NEAREST_BY_JOIN.NON_ORDERABLE_RANKING_EXPRESSION",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "expression" : "\"map(score, pscore)\"",
+    "type" : "\"MAP<DECIMAL(3,1), DECIMAL(3,1)>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 112,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 1 BY SIMILARITY map(u.score, p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    EXACT NEAREST 1 BY SIMILARITY rand() + p.pscore
+)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    APPROX NEAREST 1 BY SIMILARITY rand() + p.pscore
+)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    APPROX NEAREST 2 BY DISTANCE rand() + p.pscore
+)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY rand(0) + p.pscore
+-- !query analysis
+ExplainCommand 'Project ['u.user_id, 'p.product], SimpleMode
+
+
+-- !query
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+WHERE u.user_id > 1
+-- !query analysis
+ExplainCommand 'Project ['u.user_id, 'p.product], SimpleMode
+
+
+-- !query
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+WHERE p.product != 'C'
+-- !query analysis
+ExplainCommand 'Project ['u.user_id, 'p.product], SimpleMode
+
+
+-- !query
+SET spark.sql.crossJoin.enabled = false
+-- !query analysis
+SetCommand (spark.sql.crossJoin.enabled,Some(false))
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "NEAREST_BY_JOIN.CROSS_JOIN_NOT_ENABLED",
+  "sqlState" : "42604",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 114,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SET spark.sql.crossJoin.enabled = true
+-- !query analysis
+SetCommand (spark.sql.crossJoin.enabled,Some(true))
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW tied_products(product, pscore)
+  AS VALUES ('A', 10.0), ('B', 10.0), ('C', 10.0)
+-- !query analysis
+CreateViewCommand `tied_products`, [(product,None), (pscore,None)], VALUES ('A', 10.0), ('B', 10.0), ('C', 10.0), false, true, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT u.user_id, COUNT(*) AS num_matches
+FROM users u JOIN tied_products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+GROUP BY u.user_id
+ORDER BY u.user_id
+-- !query analysis
+Sort [user_id#x ASC NULLS FIRST], true
++- Aggregate [user_id#x], [user_id#x, count(1) AS num_matches#xL]
+   +- NearestByJoin Inner, true, 2, abs((score#x - pscore#x)), NearestByDistance
+      :- SubqueryAlias u
+      :  +- SubqueryAlias spark_catalog.default.users
+      :     +- View (`spark_catalog`.`default`.`users`, [user_id#x, score#x])
+      :        +- Project [cast(col1#x as int) AS user_id#x, cast(col2#x as decimal(3,1)) AS score#x]
+      :           +- LocalRelation [col1#x, col2#x]
+      +- SubqueryAlias p
+         +- SubqueryAlias tied_products
+            +- View (`tied_products`, [product#x, pscore#x])
+               +- Project [cast(col1#x as string) AS product#x, cast(col2#x as decimal(3,1)) AS pscore#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+DROP VIEW tied_products
+-- !query analysis
+DropTempViewCommand tied_products, false
+
+
+-- !query
+DROP VIEW users
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`users`, false, true, false
+
+
+-- !query
+DROP VIEW products
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`products`, false, true, false
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence-legacy.sql.out
new file mode 100644
index 0000000000000..4b5ed0896741f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence-legacy.sql.out
@@ -0,0 +1,127 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user
+-- !query analysis
+CreateViewCommand `v_user`, SELECT 'admin.admin' AS current_user, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [admin.admin AS current_user#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time
+-- !query analysis
+CreateViewCommand `v_time`, SELECT CAST(0 AS INT) AS current_time, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [cast(0 as int) AS current_time#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT current_user FROM v_user
+-- !query analysis
+Project [current_user#x]
++- SubqueryAlias v_user
+   +- View (`v_user`, [current_user#x])
+      +- Project [cast(current_user#x as string) AS current_user#x]
+         +- Project [admin.admin AS current_user#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT current_time FROM v_time
+-- !query analysis
+Project [current_time#x]
++- SubqueryAlias v_time
+   +- View (`v_time`, [current_time#x])
+      +- Project [cast(current_time#x as int) AS current_time#x]
+         +- Project [cast(0 as int) AS current_time#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT 'abc' AS current_user, current_user = current_user() AS function_won
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT (SELECT current_user) = current_user() AS function_won FROM v_user
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+DECLARE current_user = 'abc'
+-- !query analysis
+CreateVariable default(abc, sql=''abc''), false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user
+
+
+-- !query
+SELECT current_user, current_user FROM v_user
+-- !query analysis
+Project [current_user#x, current_user#x]
++- SubqueryAlias v_user
+   +- View (`v_user`, [current_user#x])
+      +- Project [cast(current_user#x as string) AS current_user#x]
+         +- Project [admin.admin AS current_user#x]
+            +- OneRowRelation
+
+
+-- !query
+DROP TEMPORARY VARIABLE current_user
+-- !query analysis
+DropVariable false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_date)
+SELECT typeof((SELECT current_date)) FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_timestamp)
+SELECT typeof((SELECT current_timestamp)) FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS user)
+SELECT (SELECT user) = current_user() AS function_won FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS session_user)
+SELECT (SELECT session_user) = current_user() AS function_won FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user))
+-- !query analysis
+Aggregate [current_user#x, spark_grouping_id#xL], [typeof(spark_grouping_id#xL) AS typeof(grouping_id())#x]
++- Expand [[current_user#x, current_user#x, 0]], [current_user#x, current_user#x, spark_grouping_id#xL]
+   +- Project [current_user#x, current_user#x AS current_user#x]
+      +- SubqueryAlias v_user
+         +- View (`v_user`, [current_user#x])
+            +- Project [cast(current_user#x as string) AS current_user#x]
+               +- Project [admin.admin AS current_user#x]
+                  +- OneRowRelation
+
+
+-- !query
+DROP VIEW v_user
+-- !query analysis
+DropTempViewCommand v_user, false
+
+
+-- !query
+DROP VIEW v_time
+-- !query analysis
+DropTempViewCommand v_time, false
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence.sql.out
new file mode 100644
index 0000000000000..4b5ed0896741f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/parameterless-function-name-precedence.sql.out
@@ -0,0 +1,127 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user
+-- !query analysis
+CreateViewCommand `v_user`, SELECT 'admin.admin' AS current_user, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [admin.admin AS current_user#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time
+-- !query analysis
+CreateViewCommand `v_time`, SELECT CAST(0 AS INT) AS current_time, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [cast(0 as int) AS current_time#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT current_user FROM v_user
+-- !query analysis
+Project [current_user#x]
++- SubqueryAlias v_user
+   +- View (`v_user`, [current_user#x])
+      +- Project [cast(current_user#x as string) AS current_user#x]
+         +- Project [admin.admin AS current_user#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT current_time FROM v_time
+-- !query analysis
+Project [current_time#x]
++- SubqueryAlias v_time
+   +- View (`v_time`, [current_time#x])
+      +- Project [cast(current_time#x as int) AS current_time#x]
+         +- Project [cast(0 as int) AS current_time#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT 'abc' AS current_user, current_user = current_user() AS function_won
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT (SELECT current_user) = current_user() AS function_won FROM v_user
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+DECLARE current_user = 'abc'
+-- !query analysis
+CreateVariable default(abc, sql=''abc''), false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user
+
+
+-- !query
+SELECT current_user, current_user FROM v_user
+-- !query analysis
+Project [current_user#x, current_user#x]
++- SubqueryAlias v_user
+   +- View (`v_user`, [current_user#x])
+      +- Project [cast(current_user#x as string) AS current_user#x]
+         +- Project [admin.admin AS current_user#x]
+            +- OneRowRelation
+
+
+-- !query
+DROP TEMPORARY VARIABLE current_user
+-- !query analysis
+DropVariable false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.current_user
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_date)
+SELECT typeof((SELECT current_date)) FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_timestamp)
+SELECT typeof((SELECT current_timestamp)) FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS user)
+SELECT (SELECT user) = current_user() AS function_won FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS session_user)
+SELECT (SELECT session_user) = current_user() AS function_won FROM t1
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user))
+-- !query analysis
+Aggregate [current_user#x, spark_grouping_id#xL], [typeof(spark_grouping_id#xL) AS typeof(grouping_id())#x]
++- Expand [[current_user#x, current_user#x, 0]], [current_user#x, current_user#x, spark_grouping_id#xL]
+   +- Project [current_user#x, current_user#x AS current_user#x]
+      +- SubqueryAlias v_user
+         +- View (`v_user`, [current_user#x])
+            +- Project [cast(current_user#x as string) AS current_user#x]
+               +- Project [admin.admin AS current_user#x]
+                  +- OneRowRelation
+
+
+-- !query
+DROP VIEW v_user
+-- !query analysis
+DropTempViewCommand v_user, false
+
+
+-- !query
+DROP VIEW v_time
+-- !query analysis
+DropTempViewCommand v_time, false
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
index 84ec13334ffd0..a6a86f9ebe1dd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
@@ -1979,7 +1979,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 table t
 |> tablesample (100 percent) repeatable (0)
 -- !query analysis
-Sample 0.0, 1.0, false, 0
+Sample 0.0, 1.0, false, 0, Bernoulli
 +- SubqueryAlias spark_catalog.default.t
    +- Relation spark_catalog.default.t[x#x,y#x] csv
 
@@ -1998,7 +1998,7 @@ GlobalLimit 2
 table t
 |> tablesample (bucket 1 out of 1) repeatable (0)
 -- !query analysis
-Sample 0.0, 1.0, false, 0
+Sample 0.0, 1.0, false, 0, Bernoulli
 +- SubqueryAlias spark_catalog.default.t
    +- Relation spark_catalog.default.t[x#x,y#x] csv
 
@@ -2009,10 +2009,10 @@ table t
 |> tablesample (5 rows) repeatable (0)
 |> tablesample (bucket 1 out of 1) repeatable (0)
 -- !query analysis
-Sample 0.0, 1.0, false, 0
+Sample 0.0, 1.0, false, 0, Bernoulli
 +- GlobalLimit 5
    +- LocalLimit 5
-      +- Sample 0.0, 1.0, false, 0
+      +- Sample 0.0, 1.0, false, 0, Bernoulli
          +- SubqueryAlias spark_catalog.default.t
             +- Relation spark_catalog.default.t[x#x,y#x] csv
 
@@ -2435,7 +2435,7 @@ Project [a#x]
    :        +- Project [a#x]
    :           +- SubqueryAlias grouping
    :              +- LocalRelation [a#x]
-   +- Sample 0.0, 1.0, false, 0
+   +- Sample 0.0, 1.0, false, 0, Bernoulli
       +- SubqueryAlias jt2
          +- SubqueryAlias join_test_t2
             +- View (`join_test_t2`, [a#x])
@@ -2458,7 +2458,7 @@ Project [a#x]
    :           +- SubqueryAlias grouping
    :              +- LocalRelation [a#x]
    +- SubqueryAlias jt2
-      +- Sample 0.0, 1.0, false, 0
+      +- Sample 0.0, 1.0, false, 0, Bernoulli
          +- Project [1 AS a#x]
             +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
index 6d159f4c9ee02..95627092dca2f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
@@ -262,7 +262,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v1`, SELECT * FROM base_tabl
 DESC TABLE EXTENDED v1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v1`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v1, `spark_catalog`.`temp_view_test`.`v1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v1, spark_catalog.temp_view_test.v1
 
 
 -- !query
@@ -310,7 +310,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v2`, SELECT * FROM base_tabl
 DESC TABLE EXTENDED temp_view_test.v2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v2`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v2, `spark_catalog`.`temp_view_test`.`v2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v2, spark_catalog.temp_view_test.v2
 
 
 -- !query
@@ -353,7 +353,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v3`, SELECT t1.a AS t1_a, t2
 DESC TABLE EXTENDED v3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v3`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v3, `spark_catalog`.`temp_view_test`.`v3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v3, spark_catalog.temp_view_test.v3
 
 
 -- !query
@@ -411,7 +411,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v4`, SELECT * FROM base_tabl
 DESC TABLE EXTENDED v4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v4`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v4, `spark_catalog`.`temp_view_test`.`v4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v4, spark_catalog.temp_view_test.v4
 
 
 -- !query
@@ -433,7 +433,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v5`, SELECT t1.id, t2.a FROM
 DESC TABLE EXTENDED v5
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v5`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v5, `spark_catalog`.`temp_view_test`.`v5`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v5, spark_catalog.temp_view_test.v5
 
 
 -- !query
@@ -453,7 +453,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v6`, SELECT * FROM base_tabl
 DESC TABLE EXTENDED v6
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v6`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v6, `spark_catalog`.`temp_view_test`.`v6`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v6, spark_catalog.temp_view_test.v6
 
 
 -- !query
@@ -473,7 +473,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v7`, SELECT * FROM base_tabl
 DESC TABLE EXTENDED v7
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v7`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v7, `spark_catalog`.`temp_view_test`.`v7`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v7, spark_catalog.temp_view_test.v7
 
 
 -- !query
@@ -492,7 +492,7 @@ CreateViewCommand `spark_catalog`.`temp_view_test`.`v8`, SELECT * FROM base_tabl
 DESC TABLE EXTENDED v8
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`temp_view_test`.`v8`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v8, `spark_catalog`.`temp_view_test`.`v8`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), temp_view_test.v8, spark_catalog.temp_view_test.v8
 
 
 -- !query
@@ -675,7 +675,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp1`, SELECT * FROM t1 C
 DESC TABLE EXTENDED nontemp1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`nontemp1`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp1, `spark_catalog`.`testviewschm2`.`nontemp1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp1, spark_catalog.testviewschm2.nontemp1
 
 
 -- !query
@@ -710,7 +710,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp2`, SELECT * FROM t1 I
 DESC TABLE EXTENDED nontemp2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`nontemp2`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp2, `spark_catalog`.`testviewschm2`.`nontemp2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp2, spark_catalog.testviewschm2.nontemp2
 
 
 -- !query
@@ -745,7 +745,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp3`, SELECT * FROM t1 L
 DESC TABLE EXTENDED nontemp3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`nontemp3`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp3, `spark_catalog`.`testviewschm2`.`nontemp3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp3, spark_catalog.testviewschm2.nontemp3
 
 
 -- !query
@@ -780,7 +780,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp4`, SELECT * FROM t1 L
 DESC TABLE EXTENDED nontemp4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`nontemp4`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp4, `spark_catalog`.`testviewschm2`.`nontemp4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.nontemp4, spark_catalog.testviewschm2.nontemp4
 
 
 -- !query
@@ -885,7 +885,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f), false, false,
 DESC TABLE EXTENDED pubview
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`pubview`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.pubview, `spark_catalog`.`testviewschm2`.`pubview`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.pubview, spark_catalog.testviewschm2.pubview
 
 
 -- !query
@@ -928,7 +928,7 @@ AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j), false
 DESC TABLE EXTENDED mytempview
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`mytempview`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.mytempview, `spark_catalog`.`testviewschm2`.`mytempview`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.mytempview, spark_catalog.testviewschm2.mytempview
 
 
 -- !query
@@ -1074,28 +1074,28 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`aliased_view_4`, select * fro
 DESC TABLE aliased_view_1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, `spark_catalog`.`testviewschm2`.`aliased_view_1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, spark_catalog.testviewschm2.aliased_view_1
 
 
 -- !query
 DESC TABLE aliased_view_2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, `spark_catalog`.`testviewschm2`.`aliased_view_2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, spark_catalog.testviewschm2.aliased_view_2
 
 
 -- !query
 DESC TABLE aliased_view_3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, `spark_catalog`.`testviewschm2`.`aliased_view_3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, spark_catalog.testviewschm2.aliased_view_3
 
 
 -- !query
 DESC TABLE aliased_view_4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_4`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, `spark_catalog`.`testviewschm2`.`aliased_view_4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, spark_catalog.testviewschm2.aliased_view_4
 
 
 -- !query
@@ -1108,28 +1108,28 @@ AlterTableRenameCommand `spark_catalog`.`testviewschm2`.`tx1`, `a1`, false
 DESC TABLE aliased_view_1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, `spark_catalog`.`testviewschm2`.`aliased_view_1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, spark_catalog.testviewschm2.aliased_view_1
 
 
 -- !query
 DESC TABLE aliased_view_2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, `spark_catalog`.`testviewschm2`.`aliased_view_2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, spark_catalog.testviewschm2.aliased_view_2
 
 
 -- !query
 DESC TABLE aliased_view_3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, `spark_catalog`.`testviewschm2`.`aliased_view_3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, spark_catalog.testviewschm2.aliased_view_3
 
 
 -- !query
 DESC TABLE aliased_view_4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_4`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, `spark_catalog`.`testviewschm2`.`aliased_view_4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, spark_catalog.testviewschm2.aliased_view_4
 
 
 -- !query
@@ -1142,28 +1142,28 @@ AlterTableRenameCommand `spark_catalog`.`testviewschm2`.`tt1`, `a2`, false
 DESC TABLE aliased_view_1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, `spark_catalog`.`testviewschm2`.`aliased_view_1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, spark_catalog.testviewschm2.aliased_view_1
 
 
 -- !query
 DESC TABLE aliased_view_2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, `spark_catalog`.`testviewschm2`.`aliased_view_2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, spark_catalog.testviewschm2.aliased_view_2
 
 
 -- !query
 DESC TABLE aliased_view_3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, `spark_catalog`.`testviewschm2`.`aliased_view_3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, spark_catalog.testviewschm2.aliased_view_3
 
 
 -- !query
 DESC TABLE aliased_view_4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_4`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, `spark_catalog`.`testviewschm2`.`aliased_view_4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, spark_catalog.testviewschm2.aliased_view_4
 
 
 -- !query
@@ -1176,28 +1176,28 @@ AlterTableRenameCommand `spark_catalog`.`testviewschm2`.`a1`, `tt1`, false
 DESC TABLE aliased_view_1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, `spark_catalog`.`testviewschm2`.`aliased_view_1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_1, spark_catalog.testviewschm2.aliased_view_1
 
 
 -- !query
 DESC TABLE aliased_view_2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, `spark_catalog`.`testviewschm2`.`aliased_view_2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_2, spark_catalog.testviewschm2.aliased_view_2
 
 
 -- !query
 DESC TABLE aliased_view_3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, `spark_catalog`.`testviewschm2`.`aliased_view_3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_3, spark_catalog.testviewschm2.aliased_view_3
 
 
 -- !query
 DESC TABLE aliased_view_4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`aliased_view_4`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, `spark_catalog`.`testviewschm2`.`aliased_view_4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.aliased_view_4, spark_catalog.testviewschm2.aliased_view_4
 
 
 -- !query
@@ -1334,35 +1334,35 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`v3`, select * from tt2 join t
 DESC TABLE v1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1, `spark_catalog`.`testviewschm2`.`v1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1, spark_catalog.testviewschm2.v1
 
 
 -- !query
 DESC TABLE v1a
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v1a`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1a, `spark_catalog`.`testviewschm2`.`v1a`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1a, spark_catalog.testviewschm2.v1a
 
 
 -- !query
 DESC TABLE v2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2, `spark_catalog`.`testviewschm2`.`v2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2, spark_catalog.testviewschm2.v2
 
 
 -- !query
 DESC TABLE v2a
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v2a`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2a, `spark_catalog`.`testviewschm2`.`v2a`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2a, spark_catalog.testviewschm2.v2a
 
 
 -- !query
 DESC TABLE v3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v3, `spark_catalog`.`testviewschm2`.`v3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v3, spark_catalog.testviewschm2.v3
 
 
 -- !query
@@ -1381,35 +1381,35 @@ AlterTableAddColumnsCommand `spark_catalog`.`testviewschm2`.`tt2`, [StructField(
 DESC TABLE v1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1, `spark_catalog`.`testviewschm2`.`v1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1, spark_catalog.testviewschm2.v1
 
 
 -- !query
 DESC TABLE v1a
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v1a`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1a, `spark_catalog`.`testviewschm2`.`v1a`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1a, spark_catalog.testviewschm2.v1a
 
 
 -- !query
 DESC TABLE v2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2, `spark_catalog`.`testviewschm2`.`v2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2, spark_catalog.testviewschm2.v2
 
 
 -- !query
 DESC TABLE v2a
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v2a`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2a, `spark_catalog`.`testviewschm2`.`v2a`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2a, spark_catalog.testviewschm2.v2a
 
 
 -- !query
 DESC TABLE v3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v3, `spark_catalog`.`testviewschm2`.`v3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v3, spark_catalog.testviewschm2.v3
 
 
 -- !query
@@ -1441,35 +1441,35 @@ AlterTableAddColumnsCommand `spark_catalog`.`testviewschm2`.`tt3`, [StructField(
 DESC TABLE v1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1, `spark_catalog`.`testviewschm2`.`v1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1, spark_catalog.testviewschm2.v1
 
 
 -- !query
 DESC TABLE v1a
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v1a`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1a, `spark_catalog`.`testviewschm2`.`v1a`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v1a, spark_catalog.testviewschm2.v1a
 
 
 -- !query
 DESC TABLE v2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2, `spark_catalog`.`testviewschm2`.`v2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2, spark_catalog.testviewschm2.v2
 
 
 -- !query
 DESC TABLE v2a
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v2a`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2a, `spark_catalog`.`testviewschm2`.`v2a`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v2a, spark_catalog.testviewschm2.v2a
 
 
 -- !query
 DESC TABLE v3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`v3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v3, `spark_catalog`.`testviewschm2`.`v3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.v3, spark_catalog.testviewschm2.v3
 
 
 -- !query
@@ -1502,7 +1502,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`vv1`, select * from (tt5 cros
 DESC TABLE vv1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv1, `spark_catalog`.`testviewschm2`.`vv1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv1, spark_catalog.testviewschm2.vv1
 
 
 -- !query
@@ -1515,7 +1515,7 @@ AlterTableAddColumnsCommand `spark_catalog`.`testviewschm2`.`tt5`, [StructField(
 DESC TABLE vv1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv1, `spark_catalog`.`testviewschm2`.`vv1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv1, spark_catalog.testviewschm2.vv1
 
 
 -- !query
@@ -1528,7 +1528,7 @@ AlterTableAddColumnsCommand `spark_catalog`.`testviewschm2`.`tt5`, [StructField(
 DESC TABLE vv1
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv1`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv1, `spark_catalog`.`testviewschm2`.`vv1`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv1, spark_catalog.testviewschm2.vv1
 
 
 -- !query
@@ -1574,7 +1574,7 @@ select * from tt7 full join tt8 using (x), tt8 tt8x, false, false, PersistedView
 DESC TABLE vv2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv2, `spark_catalog`.`testviewschm2`.`vv2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv2, spark_catalog.testviewschm2.vv2
 
 
 -- !query
@@ -1617,7 +1617,7 @@ select * from
 DESC TABLE vv3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv3, `spark_catalog`.`testviewschm2`.`vv3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv3, spark_catalog.testviewschm2.vv3
 
 
 -- !query
@@ -1665,7 +1665,7 @@ select * from
 DESC TABLE vv4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv4`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv4, `spark_catalog`.`testviewschm2`.`vv4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv4, spark_catalog.testviewschm2.vv4
 
 
 -- !query
@@ -1690,21 +1690,21 @@ AlterTableAddColumnsCommand `spark_catalog`.`testviewschm2`.`tt8`, [StructField(
 DESC TABLE vv2
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv2`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv2, `spark_catalog`.`testviewschm2`.`vv2`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv2, spark_catalog.testviewschm2.vv2
 
 
 -- !query
 DESC TABLE vv3
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv3`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv3, `spark_catalog`.`testviewschm2`.`vv3`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv3, spark_catalog.testviewschm2.vv3
 
 
 -- !query
 DESC TABLE vv4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv4`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv4, `spark_catalog`.`testviewschm2`.`vv4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv4, spark_catalog.testviewschm2.vv4
 
 
 -- !query
@@ -1751,14 +1751,14 @@ select * from tt7a left join tt8a using (x), tt8a tt8ax, false, false, Persisted
 DESC TABLE vv4
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv4`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv4, `spark_catalog`.`testviewschm2`.`vv4`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv4, spark_catalog.testviewschm2.vv4
 
 
 -- !query
 DESC TABLE vv2a
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv2a`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv2a, `spark_catalog`.`testviewschm2`.`vv2a`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv2a, spark_catalog.testviewschm2.vv2a
 
 
 -- !query
@@ -1790,14 +1790,14 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`vv5`, select x,y,z from tt9 j
 DESC TABLE vv5
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv5`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv5, `spark_catalog`.`testviewschm2`.`vv5`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv5, spark_catalog.testviewschm2.vv5
 
 
 -- !query
 DESC TABLE vv5
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv5`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv5, `spark_catalog`.`testviewschm2`.`vv5`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv5, spark_catalog.testviewschm2.vv5
 
 
 -- !query
@@ -1841,7 +1841,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`vv6`, select x,y,z,q from
 DESC TABLE vv6
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv6`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv6, `spark_catalog`.`testviewschm2`.`vv6`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv6, spark_catalog.testviewschm2.vv6
 
 
 -- !query
@@ -1854,7 +1854,7 @@ AlterTableAddColumnsCommand `spark_catalog`.`testviewschm2`.`tt11`, [StructField
 DESC TABLE vv6
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`vv6`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv6, `spark_catalog`.`testviewschm2`.`vv6`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.vv6, spark_catalog.testviewschm2.vv6
 
 
 -- !query
@@ -1887,7 +1887,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`tt18v`, select * from int8_tb
 DESC TABLE tt18v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`tt18v`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt18v, `spark_catalog`.`testviewschm2`.`tt18v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt18v, spark_catalog.testviewschm2.tt18v
 
 
 -- !query
@@ -1908,7 +1908,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`tt21v`, select * from tt5 nat
 DESC TABLE tt21v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`tt21v`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt21v, `spark_catalog`.`testviewschm2`.`tt21v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt21v, spark_catalog.testviewschm2.tt21v
 
 
 -- !query
@@ -1929,7 +1929,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`tt22v`, select * from tt5 nat
 DESC TABLE tt22v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`tt22v`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt22v, `spark_catalog`.`testviewschm2`.`tt22v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt22v, spark_catalog.testviewschm2.tt22v
 
 
 -- !query
@@ -1954,7 +1954,7 @@ select 42, 43, false, false, PersistedView, COMPENSATION, true
 DESC TABLE tt23v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`testviewschm2`.`tt23v`, false, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt23v, `spark_catalog`.`testviewschm2`.`tt23v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), testviewschm2.tt23v, spark_catalog.testviewschm2.tt23v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
index 4362eeb09bf66..33435514b6e2c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -507,7 +507,7 @@ Project [c1#x, c2#x, c3#x, c4#x, c5#x]
 +- LateralJoin lateral-subquery#x [c1#x && c2#x && c3#x && c4#x && c5#x], Inner
    :  +- SubqueryAlias T
    :     +- Project [c1#x AS c1#x, c2#x AS c2#x, c3#x AS c3#x, c4#x AS c4#x, c5#x AS c5#x]
-   :        +- Project [outer(c1#x), outer(c2#x), outer(c3#x), outer(c4#x), outer(c5#x)]
+   :        +- Project [outer(c1#x) AS c1#x, outer(c2#x) AS c2#x, outer(c3#x) AS c3#x, outer(c4#x) AS c4#x, outer(c5#x) AS c5#x]
    :           +- OneRowRelation
    +- SubqueryAlias v1
       +- View (`v1`, [c1#x, c2#x, c3#x, c4#x, c5#x])
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out
new file mode 100644
index 0000000000000..793b38a6172a4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out
@@ -0,0 +1,1067 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = Spark_Catalog.Default, System.Builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(Spark_Catalog, Default)), SchemaInPath(List(System, Builtin))]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.`sch.b`, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sch.b)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = SYSTEM_PATH
+-- !query analysis
+SetPathCommand [SystemPath]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+USE spark_catalog.default
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default]
+
+
+-- !query
+SET PATH = SYSTEM_PATH, CURRENT_SCHEMA
+-- !query analysis
+SetPathCommand [SystemPath, CurrentSchema]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SET PATH = PATH, system.session
+-- !query analysis
+SetPathCommand [PathRef, SchemaInPath(List(system, session))]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+USE spark_catalog.default
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default]
+
+
+-- !query
+SET PATH = current_schema, system.builtin
+-- !query analysis
+SetPathCommand [CurrentSchema, SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = current_database, system.builtin
+-- !query analysis
+SetPathCommand [CurrentSchema, SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT current_path()
+-- !query analysis
+Project [current_path() AS current_path()#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT CURRENT_PATH = current_path() AS ansi_form_matches
+-- !query analysis
+Project [(current_path() = current_path()) AS ansi_form_matches#x]
++- OneRowRelation
+
+
+-- !query
+SELECT current_path(1)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "0",
+    "functionName" : "`current_path`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "current_path(1)"
+  } ]
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SET PATH = spark_catalog.default, spark_catalog.default
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "spark_catalog.default"
+  }
+}
+
+
+-- !query
+SET PATH = spark_catalog.DEFAULT, spark_catalog.default
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "spark_catalog.default"
+  }
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH, system.builtin
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "system.builtin"
+  }
+}
+
+
+-- !query
+SET PATH = SYSTEM_PATH, SYSTEM_PATH
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "system.builtin"
+  }
+}
+
+
+-- !query
+SET PATH = current_database, current_schema
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "current_schema"
+  }
+}
+
+
+-- !query
+SET PATH = my_schema_no_catalog
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "qualifiedName" : "my_schema_no_catalog"
+  }
+}
+
+
+-- !query
+CREATE SCHEMA sql_path_routines
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines]
+
+
+-- !query
+CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.sql_path_routines.pick, INT, 7, false, false, false, false
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT pick()
+-- !query analysis
+Project [spark_catalog.sql_path_routines.pick() AS spark_catalog.sql_path_routines.pick()#x]
++- Project
+   +- OneRowRelation
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+CREATE FUNCTION sql_path_routines.pick_tvf()
+RETURNS TABLE(val INT)
+RETURN SELECT 7 AS val
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.sql_path_routines.pick_tvf, val INT, SELECT 7 AS val, true, false, false, false
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT * FROM pick_tvf()
+-- !query analysis
+Project [val#x]
++- SQLFunctionNode spark_catalog.sql_path_routines.pick_tvf
+   +- SubqueryAlias pick_tvf
+      +- Project [cast(val#x as int) AS val#x]
+         +- Project [7 AS val#x]
+            +- OneRowRelation
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+CREATE SCHEMA sql_path_routines_b
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines_b]
+
+
+-- !query
+CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.sql_path_routines_b.pick, INT, 11, false, false, false, false
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, sql_path_routines_b)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT pick() AS from_first_schema
+-- !query analysis
+Project [spark_catalog.sql_path_routines.pick() AS from_first_schema#x]
++- Project
+   +- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines_b)), SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT pick() AS from_first_schema
+-- !query analysis
+Project [spark_catalog.sql_path_routines_b.pick() AS from_first_schema#x]
++- Project
+   +- OneRowRelation
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT pick()
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`pick`",
+    "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "pick()"
+  } ]
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+DROP FUNCTION sql_path_routines.pick
+-- !query analysis
+DropFunctionCommand spark_catalog.sql_path_routines.pick, false, false
+
+
+-- !query
+DROP FUNCTION sql_path_routines.pick_tvf
+-- !query analysis
+DropFunctionCommand spark_catalog.sql_path_routines.pick_tvf, false, false
+
+
+-- !query
+DROP FUNCTION sql_path_routines_b.pick
+-- !query analysis
+DropFunctionCommand spark_catalog.sql_path_routines_b.pick, false, false
+
+
+-- !query
+DROP SCHEMA sql_path_routines
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines]
+
+
+-- !query
+DROP SCHEMA sql_path_routines_b
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines_b]
+
+
+-- !query
+CREATE SCHEMA sql_path_relations_a
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_a]
+
+
+-- !query
+CREATE SCHEMA sql_path_relations_b
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_b]
+
+
+-- !query
+CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations_a`.`tbl`, ErrorIfExists, [id]
+   +- Project [1 AS id#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations_b`.`tbl`, ErrorIfExists, [id]
+   +- Project [2 AS id#x]
+      +- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations_a)), SchemaInPath(List(spark_catalog, sql_path_relations_b)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT id FROM tbl AS from_first_schema
+-- !query analysis
+Project [id#x]
++- SubqueryAlias from_first_schema
+   +- SubqueryAlias spark_catalog.sql_path_relations_a.tbl
+      +- Relation spark_catalog.sql_path_relations_a.tbl[id#x] parquet
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations_b)), SchemaInPath(List(spark_catalog, sql_path_relations_a)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT id FROM tbl AS from_first_schema
+-- !query analysis
+Project [id#x]
++- SubqueryAlias from_first_schema
+   +- SubqueryAlias spark_catalog.sql_path_relations_b.tbl
+      +- Relation spark_catalog.sql_path_relations_b.tbl[id#x] parquet
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT id FROM tbl
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`tbl`",
+    "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 18,
+    "fragment" : "tbl"
+  } ]
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+DROP TABLE sql_path_relations_a.tbl
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations_a.tbl
+
+
+-- !query
+DROP TABLE sql_path_relations_b.tbl
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations_b.tbl
+
+
+-- !query
+DROP SCHEMA sql_path_relations_a
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_a]
+
+
+-- !query
+DROP SCHEMA sql_path_relations_b
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_b]
+
+
+-- !query
+CREATE SCHEMA sql_path_views_a
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a]
+
+
+-- !query
+CREATE SCHEMA sql_path_views_b
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b]
+
+
+-- !query
+CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_views_a`.`frozen_t`, ErrorIfExists, [id]
+   +- Project [1 AS id#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_views_b`.`frozen_t`, ErrorIfExists, [id]
+   +- Project [2 AS id#x]
+      +- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_views_a, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_views_a)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v_path_frozen`, SELECT id FROM frozen_t, false, false, PersistedView, COMPENSATION, true
+   +- Project [id#x]
+      +- SubqueryAlias spark_catalog.sql_path_views_a.frozen_t
+         +- Relation spark_catalog.sql_path_views_a.frozen_t[id#x] parquet
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_views_b, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_views_b)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT id FROM frozen_t AS bare_lookup_uses_live_path
+-- !query analysis
+Project [id#x]
++- SubqueryAlias bare_lookup_uses_live_path
+   +- SubqueryAlias spark_catalog.sql_path_views_b.frozen_t
+      +- Relation spark_catalog.sql_path_views_b.frozen_t[id#x] parquet
+
+
+-- !query
+SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path
+-- !query analysis
+Project [id#x]
++- SubqueryAlias view_body_uses_frozen_path
+   +- SubqueryAlias spark_catalog.default.v_path_frozen
+      +- View (`spark_catalog`.`default`.`v_path_frozen`, [id#x])
+         +- Project [cast(id#x as int) AS id#x]
+            +- Project [id#x]
+               +- SubqueryAlias spark_catalog.sql_path_views_a.frozen_t
+                  +- Relation spark_catalog.sql_path_views_a.frozen_t[id#x] parquet
+
+
+-- !query
+USE spark_catalog.sql_path_views_a
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a]
+
+
+-- !query
+CREATE VIEW sql_path_views_a.v_ctx AS
+SELECT current_schema() AS cs, current_path() AS cp
+-- !query analysis
+CreateViewCommand `spark_catalog`.`sql_path_views_a`.`v_ctx`, SELECT current_schema() AS cs, current_path() AS cp, false, false, PersistedView, COMPENSATION, true
+   +- Project [current_schema() AS cs#x, current_path() AS cp#x]
+      +- OneRowRelation
+
+
+-- !query
+USE spark_catalog.sql_path_views_b
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b]
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SELECT cs, cp FROM sql_path_views_a.v_ctx
+-- !query analysis
+Project [cs#x, cp#x]
++- SubqueryAlias spark_catalog.sql_path_views_a.v_ctx
+   +- View (`spark_catalog`.`sql_path_views_a`.`v_ctx`, [cs#x, cp#x])
+      +- Project [cast(cs#x as string) AS cs#x, cast(cp#x as string) AS cp#x]
+         +- Project [current_schema() AS cs#x, current_path() AS cp#x]
+            +- OneRowRelation
+
+
+-- !query
+USE spark_catalog.default
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default]
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+DROP VIEW default.v_path_frozen
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v_path_frozen`, false, true, false
+
+
+-- !query
+DROP VIEW sql_path_views_a.v_ctx
+-- !query analysis
+DropTableCommand `spark_catalog`.`sql_path_views_a`.`v_ctx`, false, true, false
+
+
+-- !query
+DROP TABLE sql_path_views_a.frozen_t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_views_a.frozen_t
+
+
+-- !query
+DROP TABLE sql_path_views_b.frozen_t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_views_b.frozen_t
+
+
+-- !query
+DROP SCHEMA sql_path_views_a
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a]
+
+
+-- !query
+DROP SCHEMA sql_path_views_b
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b]
+
+
+-- !query
+CREATE SCHEMA sql_path_fn_a
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a]
+
+
+-- !query
+CREATE SCHEMA sql_path_fn_b
+-- !query analysis
+CreateNamespace false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b]
+
+
+-- !query
+CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_fn_a`.`frozen_t`, ErrorIfExists, [id]
+   +- Project [10 AS id#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_fn_b`.`frozen_t`, ErrorIfExists, [id]
+   +- Project [20 AS id#x]
+      +- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_a, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_a)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+CREATE FUNCTION default.frozen_fn()
+RETURNS INT
+RETURN (SELECT MAX(id) FROM frozen_t)
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.frozen_fn, INT, (SELECT MAX(id) FROM frozen_t), false, false, false, false
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_b, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_b)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path
+-- !query analysis
+Aggregate [max(id#x) AS max(id)#x]
++- SubqueryAlias bare_lookup_uses_live_path
+   +- SubqueryAlias spark_catalog.sql_path_fn_b.frozen_t
+      +- Relation spark_catalog.sql_path_fn_b.frozen_t[id#x] parquet
+
+
+-- !query
+SELECT default.frozen_fn() AS scalar_body_uses_frozen_path
+-- !query analysis
+Project [spark_catalog.default.frozen_fn() AS scalar_body_uses_frozen_path#x]
+:  +- Aggregate [max(id#x) AS max(id)#x]
+:     +- SubqueryAlias spark_catalog.sql_path_fn_a.frozen_t
+:        +- Relation spark_catalog.sql_path_fn_a.frozen_t[id#x] parquet
++- Project
+   +- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_a, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_a)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+CREATE FUNCTION default.frozen_tvf()
+RETURNS TABLE(id INT)
+RETURN SELECT MAX(id) AS id FROM frozen_t
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.default.frozen_tvf, id INT, SELECT MAX(id) AS id FROM frozen_t, true, false, false, false
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_b, system.builtin
+-- !query analysis
+SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_b)), SchemaInPath(List(system, builtin))]
+
+
+-- !query
+SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path
+-- !query analysis
+Project [id#x]
++- SubqueryAlias table_body_uses_frozen_path
+   +- SQLFunctionNode spark_catalog.default.frozen_tvf
+      +- SubqueryAlias frozen_tvf
+         +- Project [cast(id#x as int) AS id#x]
+            +- Aggregate [max(id#x) AS id#x]
+               +- SubqueryAlias spark_catalog.sql_path_fn_a.frozen_t
+                  +- Relation spark_catalog.sql_path_fn_a.frozen_t[id#x] parquet
+
+
+-- !query
+USE spark_catalog.sql_path_fn_a
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a]
+
+
+-- !query
+CREATE FUNCTION sql_path_fn_a.f_ctx()
+RETURNS STRING
+RETURN concat(current_schema(), '::', current_path())
+-- !query analysis
+CreateSQLFunctionCommand spark_catalog.sql_path_fn_a.f_ctx, STRING, concat(current_schema(), '::', current_path()), false, false, false, false
+
+
+-- !query
+USE spark_catalog.sql_path_fn_b
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b]
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SELECT sql_path_fn_a.f_ctx() AS invoker_context
+-- !query analysis
+Project [spark_catalog.sql_path_fn_a.f_ctx() AS invoker_context#x]
++- Project
+   +- OneRowRelation
+
+
+-- !query
+USE spark_catalog.default
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default]
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+DROP FUNCTION default.frozen_fn
+-- !query analysis
+DropFunctionCommand spark_catalog.default.frozen_fn, false, false
+
+
+-- !query
+DROP FUNCTION default.frozen_tvf
+-- !query analysis
+DropFunctionCommand spark_catalog.default.frozen_tvf, false, false
+
+
+-- !query
+DROP FUNCTION sql_path_fn_a.f_ctx
+-- !query analysis
+DropFunctionCommand spark_catalog.sql_path_fn_a.f_ctx, false, false
+
+
+-- !query
+DROP TABLE sql_path_fn_a.frozen_t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_fn_a.frozen_t
+
+
+-- !query
+DROP TABLE sql_path_fn_b.frozen_t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_fn_b.frozen_t
+
+
+-- !query
+DROP SCHEMA sql_path_fn_a
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a]
+
+
+-- !query
+DROP SCHEMA sql_path_fn_b
+-- !query analysis
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b]
+
+
+-- !query
+SET spark.sql.defaultPath = system.session, system.builtin
+-- !query analysis
+SetCommand (spark.sql.defaultPath,Some(system.session, system.builtin))
+
+
+-- !query
+SET PATH = system.builtin, system.session
+-- !query analysis
+SetPathCommand [SchemaInPath(List(system, builtin)), SchemaInPath(List(system, session))]
+
+
+-- !query
+SELECT current_path() AS explicit_set_path_wins_over_conf
+-- !query analysis
+Project [current_path() AS explicit_set_path_wins_over_conf#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+RESET spark.sql.defaultPath
+-- !query analysis
+ResetCommand spark.sql.defaultPath
+
+
+-- !query
+SET spark.sql.defaultPath = system.session, system.builtin, current_schema
+-- !query analysis
+SetCommand (spark.sql.defaultPath,Some(system.session, system.builtin, current_schema))
+
+
+-- !query
+USE spark_catalog.default
+-- !query analysis
+SetCatalogAndNamespace
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default]
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SELECT current_path() AS default_path_expands_to_conf
+-- !query analysis
+Project [current_path() AS default_path_expands_to_conf#x]
++- OneRowRelation
+
+
+-- !query
+RESET spark.sql.defaultPath
+-- !query analysis
+ResetCommand spark.sql.defaultPath
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query analysis
+SetPathCommand [DefaultPath]
+
+
+-- !query
+SET spark.sql.defaultPath = this is not a path
+-- !query analysis
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT",
+  "sqlState" : "22022",
+  "messageParameters" : {
+    "confName" : "spark.sql.defaultPath",
+    "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).",
+    "confValue" : "this is not a path"
+  }
+}
+
+
+-- !query
+SET spark.sql.defaultPath = PATH, system.builtin
+-- !query analysis
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT",
+  "sqlState" : "22022",
+  "messageParameters" : {
+    "confName" : "spark.sql.defaultPath",
+    "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).",
+    "confValue" : "PATH, system.builtin"
+  }
+}
+
+
+-- !query
+SET spark.sql.path.enabled = false
+-- !query analysis
+SetCommand (spark.sql.path.enabled,Some(false))
+
+
+-- !query
+SELECT current_path() IS NOT NULL AS has_path
+-- !query analysis
+Project [isnotnull(current_path()) AS has_path#x]
++- OneRowRelation
+
+
+-- !query
+SET PATH = spark_catalog.default
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "config" : "spark.sql.path.enabled"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
index fdb7c8adf2826..4b6fc5d450140 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
@@ -504,9 +504,58 @@ Project [scalar-subquery#x [title#x] AS scalarsubquery(title)#xL]
 
 
 -- !query
-SET VARIABLE title = 'Test qualifiers - fail'
+SET VARIABLE title = 'Dropped struct variable -- field access vs qualified name'
 -- !query analysis
 SetVariable [variablereference(system.session.title='Test variable in aggregate')]
++- Project [Dropped struct variable -- field access vs qualified name AS title#x]
+   +- OneRowRelation
+
+
+-- !query
+DECLARE OR REPLACE VARIABLE session STRUCT<a INT> = NAMED_STRUCT('a', 1)
+-- !query analysis
+CreateVariable default(cast(named_struct(a, 1) as struct<a:int>), sql='NAMED_STRUCT('a', 1)'), true
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.session
+
+
+-- !query
+SELECT session.a
+-- !query analysis
+Project [variablereference(system.session.session=NAMED_STRUCT('a', 1)).a AS a#x]
++- OneRowRelation
+
+
+-- !query
+DROP TEMPORARY VARIABLE session
+-- !query analysis
+DropVariable false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.session
+
+
+-- !query
+SELECT session.a
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`session`.`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "session.a"
+  } ]
+}
+
+
+-- !query
+SET VARIABLE title = 'Test qualifiers - fail'
+-- !query analysis
+SetVariable [variablereference(system.session.title='Dropped struct variable -- field access vs qualified name')]
 +- Project [Test qualifiers - fail AS title#x]
    +- OneRowRelation
 
@@ -519,9 +568,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`system`.`session`",
+    "searchPath" : "[`system`.`session`]",
     "variableName" : "`builtin`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 40,
+    "fragment" : "builtin.var1"
+  } ]
 }
 
 
@@ -533,9 +589,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`system`.`session`",
+    "searchPath" : "[`system`.`session`]",
     "variableName" : "`system`.`sesion`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 46,
+    "fragment" : "system.sesion.var1"
+  } ]
 }
 
 
@@ -547,9 +610,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`system`.`session`",
+    "searchPath" : "[`system`.`session`]",
     "variableName" : "`sys`.`session`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 44,
+    "fragment" : "sys.session.var1"
+  } ]
 }
 
 
@@ -648,9 +718,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`SYSTEM`.`SESSION`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]",
     "variableName" : "`ses`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 21,
+    "fragment" : "ses.var1"
+  } ]
 }
 
 
@@ -662,9 +739,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`SYSTEM`.`SESSION`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]",
     "variableName" : "`builtn`.`session`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 32,
+    "fragment" : "builtn.session.var1"
+  } ]
 }
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence-legacy.sql.out
new file mode 100644
index 0000000000000..04fb7715961d0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence-legacy.sql.out
@@ -0,0 +1,293 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x
+-- !query analysis
+CreateViewCommand `v1`, SELECT 1 AS x, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [1 AS x#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x
+-- !query analysis
+CreateSQLFunctionCommand identity_fn, x INT, INT, x, false, true, false, true
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query analysis
+Project [identity_fn(x#x) AS identity_fn(42)#x]
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1)
+-- !query analysis
+CreateSQLFunctionCommand col_vs_param, x INT, INT, (SELECT x FROM v1), false, true, false, true
+
+
+-- !query
+SELECT col_vs_param(42)
+-- !query analysis
+Project [col_vs_param(x#x) AS col_vs_param(42)#x]
+:  +- Project [x#x]
+:     +- SubqueryAlias v1
+:        +- View (`v1`, [x#x])
+:           +- Project [cast(x#x as int) AS x#x]
+:              +- Project [1 AS x#x]
+:                 +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING)
+RETURNS STRING RETURN current_user
+-- !query analysis
+CreateSQLFunctionCommand paramless_vs_param, current_user STRING, STRING, current_user, false, true, false, true
+
+
+-- !query
+SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT)
+RETURNS STRING RETURN typeof(current_date)
+-- !query analysis
+CreateSQLFunctionCommand paramless_vs_param_date, current_date INT, STRING, typeof(current_date), false, true, false, true
+
+
+-- !query
+SELECT paramless_vs_param_date(42)
+-- !query analysis
+Project [paramless_vs_param_date(current_date#x) AS paramless_vs_param_date(42)#x]
++- Project [cast(42 as int) AS current_date#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT)
+RETURNS STRING RETURN typeof(current_time)
+-- !query analysis
+CreateSQLFunctionCommand paramless_vs_param_time, current_time INT, STRING, typeof(current_time), false, true, false, true
+
+
+-- !query
+SELECT paramless_vs_param_time(42)
+-- !query analysis
+Project [paramless_vs_param_time(current_time#x) AS paramless_vs_param_time(42)#x]
++- Project [cast(42 as int) AS current_time#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT)
+RETURNS INT RETURN grouping__id
+-- !query analysis
+CreateSQLFunctionCommand paramless_vs_param_grouping, grouping__id INT, INT, grouping__id, false, true, false, true
+
+
+-- !query
+SELECT paramless_vs_param_grouping(42)
+-- !query analysis
+Project [paramless_vs_param_grouping(grouping__id#x) AS paramless_vs_param_grouping(42)#x]
++- Project [cast(42 as int) AS grouping__id#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT)
+RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y))
+-- !query analysis
+CreateSQLFunctionCommand lca_vs_param, x INT, INT, (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)), false, true, false, true
+
+
+-- !query
+SELECT lca_vs_param(42)
+-- !query analysis
+Project [lca_vs_param(x#x) AS lca_vs_param(42)#x]
+:  +- Project [y#x]
+:     +- SubqueryAlias __auto_generated_subquery_name
+:        +- Project [x#x, (x#x + 1) AS y#x]
+:           +- Project [999 AS x#x]
+:              +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT)
+RETURNS INT RETURN (SELECT (SELECT x) FROM v1)
+-- !query analysis
+CreateSQLFunctionCommand outer_vs_param, x INT, INT, (SELECT (SELECT x) FROM v1), false, true, false, true
+
+
+-- !query
+SELECT outer_vs_param(42)
+-- !query analysis
+Project [outer_vs_param(x#x) AS outer_vs_param(42)#x]
+:  +- Project [scalar-subquery#x [x#x] AS scalarsubquery(x)#x]
+:     :  +- Project [outer(x#x)]
+:     :     +- OneRowRelation
+:     +- SubqueryAlias v1
+:        +- View (`v1`, [x#x])
+:           +- Project [cast(x#x as int) AS x#x]
+:              +- Project [1 AS x#x]
+:                 +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT)
+RETURNS INT RETURN (SELECT (SELECT x))
+-- !query analysis
+CreateSQLFunctionCommand outer_param_pure, x INT, INT, (SELECT (SELECT x)), false, true, false, true
+
+
+-- !query
+SELECT outer_param_pure(42)
+-- !query analysis
+Project [outer_param_pure(x#x) AS outer_param_pure(42)#x]
+:  +- Project [outer(x#x)]
+:     +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+DECLARE x = 999
+-- !query analysis
+CreateVariable default(999, sql='999'), false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query analysis
+Project [identity_fn(x#x) AS identity_fn(42)#x]
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x
+-- !query analysis
+CreateSQLFunctionCommand inner_fn, y INT, INT, x, false, true, false, true
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x)
+-- !query analysis
+CreateSQLFunctionCommand outer_fn, x INT, INT, inner_fn(x), false, true, false, true
+
+
+-- !query
+SELECT outer_fn(42)
+-- !query analysis
+Project [outer_fn(x#x) AS outer_fn(42)#x]
++- Project [x#x, cast(x#x as int) AS y#x]
+   +- Project [cast(42 as int) AS x#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING)
+RETURNS TABLE(c STRING) RETURN SELECT current_user AS c
+-- !query analysis
+CreateSQLFunctionCommand tvf_paramless_vs_param, current_user STRING, c STRING, SELECT current_user AS c, true, true, false, true
+
+
+-- !query
+SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+DROP TEMPORARY VARIABLE x
+-- !query analysis
+DropVariable false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x
+
+
+-- !query
+DROP VIEW v1
+-- !query analysis
+DropTempViewCommand v1, false
+
+
+-- !query
+DROP TEMPORARY FUNCTION identity_fn
+-- !query analysis
+DropFunctionCommand identity_fn, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION col_vs_param
+-- !query analysis
+DropFunctionCommand col_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param
+-- !query analysis
+DropFunctionCommand paramless_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_date
+-- !query analysis
+DropFunctionCommand paramless_vs_param_date, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_time
+-- !query analysis
+DropFunctionCommand paramless_vs_param_time, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_grouping
+-- !query analysis
+DropFunctionCommand paramless_vs_param_grouping, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION lca_vs_param
+-- !query analysis
+DropFunctionCommand lca_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_vs_param
+-- !query analysis
+DropFunctionCommand outer_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_param_pure
+-- !query analysis
+DropFunctionCommand outer_param_pure, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION inner_fn
+-- !query analysis
+DropFunctionCommand inner_fn, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_fn
+-- !query analysis
+DropFunctionCommand outer_fn, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION tvf_paramless_vs_param
+-- !query analysis
+DropFunctionCommand tvf_paramless_vs_param, false, true
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence.sql.out
new file mode 100644
index 0000000000000..03e2b3c6d8cb3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf-name-precedence.sql.out
@@ -0,0 +1,320 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x
+-- !query analysis
+CreateViewCommand `v1`, SELECT 1 AS x, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [1 AS x#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x
+-- !query analysis
+CreateSQLFunctionCommand identity_fn, x INT, INT, x, false, true, false, true
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query analysis
+Project [identity_fn(x#x) AS identity_fn(42)#x]
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1)
+-- !query analysis
+CreateSQLFunctionCommand col_vs_param, x INT, INT, (SELECT x FROM v1), false, true, false, true
+
+
+-- !query
+SELECT col_vs_param(42)
+-- !query analysis
+Project [col_vs_param(x#x) AS col_vs_param(42)#x]
+:  +- Project [x#x]
+:     +- SubqueryAlias v1
+:        +- View (`v1`, [x#x])
+:           +- Project [cast(x#x as int) AS x#x]
+:              +- Project [1 AS x#x]
+:                 +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING)
+RETURNS STRING RETURN current_user
+-- !query analysis
+CreateSQLFunctionCommand paramless_vs_param, current_user STRING, STRING, current_user, false, true, false, true
+
+
+-- !query
+SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT)
+RETURNS STRING RETURN typeof(current_date)
+-- !query analysis
+CreateSQLFunctionCommand paramless_vs_param_date, current_date INT, STRING, typeof(current_date), false, true, false, true
+
+
+-- !query
+SELECT paramless_vs_param_date(42)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT)
+RETURNS STRING RETURN typeof(current_time)
+-- !query analysis
+CreateSQLFunctionCommand paramless_vs_param_time, current_time INT, STRING, typeof(current_time), false, true, false, true
+
+
+-- !query
+SELECT paramless_vs_param_time(42)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT)
+RETURNS INT RETURN grouping__id
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION",
+  "sqlState" : "42K0E",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 12,
+    "fragment" : "grouping__id"
+  } ]
+}
+
+
+-- !query
+SELECT paramless_vs_param_grouping(42)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`paramless_vs_param_grouping`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "paramless_vs_param_grouping(42)"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT)
+RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y))
+-- !query analysis
+CreateSQLFunctionCommand lca_vs_param, x INT, INT, (SELECT y FROM (SELECT 999 AS x, x + 1 AS y)), false, true, false, true
+
+
+-- !query
+SELECT lca_vs_param(42)
+-- !query analysis
+Project [lca_vs_param(x#x) AS lca_vs_param(42)#x]
+:  +- Project [y#x]
+:     +- SubqueryAlias __auto_generated_subquery_name
+:        +- Project [x#x, (x#x + 1) AS y#x]
+:           +- Project [999 AS x#x]
+:              +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT)
+RETURNS INT RETURN (SELECT (SELECT x) FROM v1)
+-- !query analysis
+CreateSQLFunctionCommand outer_vs_param, x INT, INT, (SELECT (SELECT x) FROM v1), false, true, false, true
+
+
+-- !query
+SELECT outer_vs_param(42)
+-- !query analysis
+Project [outer_vs_param(x#x) AS outer_vs_param(42)#x]
+:  +- Project [scalar-subquery#x [x#x] AS scalarsubquery(x)#x]
+:     :  +- Project [outer(x#x)]
+:     :     +- OneRowRelation
+:     +- SubqueryAlias v1
+:        +- View (`v1`, [x#x])
+:           +- Project [cast(x#x as int) AS x#x]
+:              +- Project [1 AS x#x]
+:                 +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT)
+RETURNS INT RETURN (SELECT (SELECT x))
+-- !query analysis
+CreateSQLFunctionCommand outer_param_pure, x INT, INT, (SELECT (SELECT x)), false, true, false, true
+
+
+-- !query
+SELECT outer_param_pure(42)
+-- !query analysis
+Project [outer_param_pure(x#x) AS outer_param_pure(42)#x]
+:  +- Project [outer(x#x)]
+:     +- OneRowRelation
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+DECLARE x = 999
+-- !query analysis
+CreateVariable default(999, sql='999'), false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query analysis
+Project [identity_fn(x#x) AS identity_fn(42)#x]
++- Project [cast(42 as int) AS x#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x
+-- !query analysis
+CreateSQLFunctionCommand inner_fn, y INT, INT, x, false, true, false, true
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x)
+-- !query analysis
+CreateSQLFunctionCommand outer_fn, x INT, INT, inner_fn(x), false, true, false, true
+
+
+-- !query
+SELECT outer_fn(42)
+-- !query analysis
+Project [outer_fn(x#x) AS outer_fn(42)#x]
++- Project [x#x, cast(x#x as int) AS y#x]
+   +- Project [cast(42 as int) AS x#x]
+      +- OneRowRelation
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING)
+RETURNS TABLE(c STRING) RETURN SELECT current_user AS c
+-- !query analysis
+CreateSQLFunctionCommand tvf_paramless_vs_param, current_user STRING, c STRING, SELECT current_user AS c, true, true, false, true
+
+
+-- !query
+SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+DROP TEMPORARY VARIABLE x
+-- !query analysis
+DropVariable false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.x
+
+
+-- !query
+DROP VIEW v1
+-- !query analysis
+DropTempViewCommand v1, false
+
+
+-- !query
+DROP TEMPORARY FUNCTION identity_fn
+-- !query analysis
+DropFunctionCommand identity_fn, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION col_vs_param
+-- !query analysis
+DropFunctionCommand col_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param
+-- !query analysis
+DropFunctionCommand paramless_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_date
+-- !query analysis
+DropFunctionCommand paramless_vs_param_date, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_time
+-- !query analysis
+DropFunctionCommand paramless_vs_param_time, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_grouping
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException
+{
+  "errorClass" : "ROUTINE_NOT_FOUND",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`paramless_vs_param_grouping`"
+  }
+}
+
+
+-- !query
+DROP TEMPORARY FUNCTION lca_vs_param
+-- !query analysis
+DropFunctionCommand lca_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_vs_param
+-- !query analysis
+DropFunctionCommand outer_vs_param, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_param_pure
+-- !query analysis
+DropFunctionCommand outer_param_pure, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION inner_fn
+-- !query analysis
+DropFunctionCommand inner_fn, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_fn
+-- !query analysis
+DropFunctionCommand outer_fn, false, true
+
+
+-- !query
+DROP TEMPORARY FUNCTION tvf_paramless_vs_param
+-- !query analysis
+DropFunctionCommand tvf_paramless_vs_param, false, true
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index 34969f43e1f73..dd30adde9843a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -224,7 +224,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOGRAPHY(ANY)))) AS result
 -- !query analysis
-Project [hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) as geography(any)))) AS result#x]
+Project [hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) as geography(any)), NDR)) AS result#x]
 +- OneRowRelation
 
 
@@ -253,7 +253,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOMETRY(4326)))) AS result
 -- !query analysis
-Project [hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) as geometry(4326)))) AS result#x]
+Project [hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) as geometry(4326)), NDR)) AS result#x]
 +- OneRowRelation
 
 
@@ -282,7 +282,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOMETRY(ANY)))) AS result
 -- !query analysis
-Project [hex(st_asbinary(cast(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0) as geometry(any)))) AS result#x]
+Project [hex(st_asbinary(cast(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0) as geometry(any)), NDR)) AS result#x]
 +- OneRowRelation
 
 
@@ -311,7 +311,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040', 4326) AS GEOGRAPHY(4326)))) AS result
 -- !query analysis
-Project [hex(st_asbinary(cast(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 4326) as geography(4326)))) AS result#x]
+Project [hex(st_asbinary(cast(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 4326) as geography(4326)), NDR)) AS result#x]
 +- OneRowRelation
 
 
@@ -491,28 +491,28 @@ Project [typeof(if (isnotnull(wkb#x)) cast(st_geomfromwkb(wkb#x, 0) as geometry(
 -- !query
 SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
 -- !query analysis
-Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x]
+Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040), NDR)) AS result#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
 -- !query analysis
-Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0))) AS result#x]
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0), NDR)) AS result#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT ST_AsBinary(ST_GeogFromWKB(NULL))
 -- !query analysis
-Project [st_asbinary(st_geogfromwkb(cast(null as binary))) AS st_asbinary(st_geogfromwkb(NULL))#x]
+Project [st_asbinary(st_geogfromwkb(cast(null as binary)), NDR) AS st_asbinary(st_geogfromwkb(NULL), NDR)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040')))
 -- !query analysis
-Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040))) AS hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')))#x]
+Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040), NDR)) AS hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), NDR))#x]
 +- OneRowRelation
 
 
@@ -536,7 +536,7 @@ Aggregate [count(1) AS count(1)#xL]
 SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb)) <> wkb
 -- !query analysis
 Aggregate [count(1) AS count(1)#xL]
-+- Filter NOT (st_asbinary(st_geogfromwkb(wkb#x)) = wkb#x)
++- Filter NOT (st_asbinary(st_geogfromwkb(wkb#x), NDR) = wkb#x)
    +- SubqueryAlias spark_catalog.default.geodata
       +- Relation spark_catalog.default.geodata[wkb#x] parquet
 
@@ -544,35 +544,35 @@ Aggregate [count(1) AS count(1)#xL]
 -- !query
 SELECT ST_AsBinary(ST_GeomFromWKB(NULL))
 -- !query analysis
-Project [st_asbinary(st_geomfromwkb(cast(null as binary), 0)) AS st_asbinary(st_geomfromwkb(NULL, 0))#x]
+Project [st_asbinary(st_geomfromwkb(cast(null as binary), 0), NDR) AS st_asbinary(st_geomfromwkb(NULL, 0), NDR)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040')))
 -- !query analysis
-Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0))) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0)))#x]
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0), NDR)) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), NDR))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 0)))
 -- !query analysis
-Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0))) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0)))#x]
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0), NDR)) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), NDR))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 3857)))
 -- !query analysis
-Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 3857))) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 3857)))#x]
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 3857), NDR)) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 3857), NDR))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 4326)))
 -- !query analysis
-Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 4326))) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 4326)))#x]
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 4326), NDR)) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 4326), NDR))#x]
 +- OneRowRelation
 
 
@@ -638,7 +638,7 @@ Aggregate [count(1) AS count(1)#xL]
 SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb)) <> wkb
 -- !query analysis
 Aggregate [count(1) AS count(1)#xL]
-+- Filter NOT (st_asbinary(st_geomfromwkb(wkb#x, 0)) = wkb#x)
++- Filter NOT (st_asbinary(st_geomfromwkb(wkb#x, 0), NDR) = wkb#x)
    +- SubqueryAlias spark_catalog.default.geodata
       +- Relation spark_catalog.default.geodata[wkb#x] parquet
 
@@ -647,7 +647,7 @@ Aggregate [count(1) AS count(1)#xL]
 SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb, 4326)) <> wkb
 -- !query analysis
 Aggregate [count(1) AS count(1)#xL]
-+- Filter NOT (st_asbinary(st_geomfromwkb(wkb#x, 4326)) = wkb#x)
++- Filter NOT (st_asbinary(st_geomfromwkb(wkb#x, 4326), NDR) = wkb#x)
    +- SubqueryAlias spark_catalog.default.geodata
       +- Relation spark_catalog.default.geodata[wkb#x] parquet
 
@@ -665,6 +665,157 @@ org.apache.spark.SparkIllegalArgumentException
 }
 
 
+-- !query
+SELECT ST_AsBinary(NULL)
+-- !query analysis
+Project [st_asbinary(cast(null as geography(any)), NDR) AS st_asbinary(NULL, NDR)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040')))
+-- !query analysis
+Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040), NDR)) AS hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), NDR))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'00000000013FF00000000000004000000000000000'), 'NDR'))
+-- !query analysis
+Project [hex(st_asbinary(st_geogfromwkb(0x00000000013FF00000000000004000000000000000), NDR)) AS hex(st_asbinary(st_geogfromwkb(X'00000000013FF00000000000004000000000000000'), NDR))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR'))
+-- !query analysis
+Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040), XDR)) AS hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), XDR))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040')))
+-- !query analysis
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0), NDR)) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), NDR))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'00000000013FF00000000000004000000000000000'), 'NDR'))
+-- !query analysis
+Project [hex(st_asbinary(st_geomfromwkb(0x00000000013FF00000000000004000000000000000, 0), NDR)) AS hex(st_asbinary(st_geomfromwkb(X'00000000013FF00000000000004000000000000000', 0), NDR))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR'))
+-- !query analysis
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0), XDR)) AS hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), XDR))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), '')
+-- !query analysis
+Project [st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040), ) AS st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), )#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'ABC')
+-- !query analysis
+Project [st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040), ABC) AS st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), ABC)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'big-endian')
+-- !query analysis
+Project [st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040), big-endian) AS st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), big-endian)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb)) <> wkb
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter NOT (st_asbinary(st_geogfromwkb(wkb#x), NDR) = wkb#x)
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'NDR') <> wkb
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter NOT (st_asbinary(st_geogfromwkb(wkb#x), NDR) = wkb#x)
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'XDR') = wkb
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter (st_asbinary(st_geogfromwkb(wkb#x), XDR) = wkb#x)
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb)) <> wkb
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter NOT (st_asbinary(st_geomfromwkb(wkb#x, 0), NDR) = wkb#x)
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb), 'NDR') <> wkb
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter NOT (st_asbinary(st_geomfromwkb(wkb#x, 0), NDR) = wkb#x)
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb), 'XDR') = wkb
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter (st_asbinary(st_geomfromwkb(wkb#x, 0), XDR) = wkb#x)
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), '') IS NOT NULL
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter isnotnull(st_asbinary(st_geogfromwkb(wkb#x), ))
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'ABC') IS NOT NULL
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter isnotnull(st_asbinary(st_geogfromwkb(wkb#x), ABC))
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'big-endian') IS NOT NULL
+-- !query analysis
+Aggregate [count(1) AS count(1)#xL]
++- Filter isnotnull(st_asbinary(st_geogfromwkb(wkb#x), big-endian))
+   +- SubqueryAlias spark_catalog.default.geodata
+      +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
 -- !query
 SELECT ST_Srid(NULL)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 44512caf1def0..9ca0517a60e8a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -1075,3 +1075,73 @@ Project [c1#x, c2#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL]
    +- View (`t1`, [c1#x, c2#x])
       +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
          +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1)
+-- !query analysis
+Project [scalar-subquery#x [col1#x] AS scalarsubquery(col1)#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [outer(col1#x) AS col1#x]
+:           +- SubqueryAlias t2
+:              +- LocalRelation [col1#x]
++- SubqueryAlias t1
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT (SELECT t1.s.* FROM VALUES(2) AS t2(col1) LIMIT 1)
+FROM (SELECT named_struct('a', 1) AS s) AS t1
+-- !query analysis
+Project [scalar-subquery#x [s#x] AS scalarsubquery(s)#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [outer(s#x).a AS a#x]
+:           +- SubqueryAlias t2
+:              +- LocalRelation [col1#x]
++- SubqueryAlias t1
+   +- Project [named_struct(a, 1) AS s#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT (SELECT * FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1)
+-- !query analysis
+Project [scalar-subquery#x [] AS scalarsubquery()#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [col1#x]
+:           +- SubqueryAlias t2
+:              +- LocalRelation [col1#x]
++- SubqueryAlias t1
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT (SELECT t1.* FROM (SELECT 3 AS col1) AS t1 LIMIT 1) FROM VALUES(1) AS t1(col1)
+-- !query analysis
+Project [scalar-subquery#x [] AS scalarsubquery()#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [col1#x]
+:           +- SubqueryAlias t1
+:              +- Project [3 AS col1#x]
+:                 +- OneRowRelation
++- SubqueryAlias t1
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT (SELECT * FROM (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1)) FROM VALUES(1) AS t1(col1)
+-- !query analysis
+Project [scalar-subquery#x [col1#x] AS scalarsubquery(col1)#x]
+:  +- Project [col1#x]
+:     +- SubqueryAlias __auto_generated_subquery_name
+:        +- GlobalLimit 1
+:           +- LocalLimit 1
+:              +- Project [outer(col1#x) AS col1#x]
+:                 +- SubqueryAlias t2
+:                    +- LocalRelation [col1#x]
++- SubqueryAlias t1
+   +- LocalRelation [col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/time-bucket.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/time-bucket.sql.out
new file mode 100644
index 0000000000000..31eccd02d8242
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/time-bucket.sql.out
@@ -0,0 +1,931 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SET TIME ZONE 'UTC'
+-- !query analysis
+SetCommand (spark.sql.session.timeZone,Some(UTC))
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '00' SECOND",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '00' SECOND, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '0' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '0' YEAR",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-15' MINUTE",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-1' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-1' YEAR",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '00' MINUTE",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '15' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-10' MINUTE",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '05' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 95,
+    "fragment" : "time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '0' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '3' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-2' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '1' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP_NTZ '2024-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP_NTZ\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 107,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP '2024-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"TIMESTAMP_NTZ\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 107,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"15 minutes\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\")",
+    "sqlExpr" : "\"time_bucket(15 minutes, TIMESTAMP '2024-01-15 10:23:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 65,
+    "fragment" : "time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '2024-01-15'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2024-01-15 10:23:00\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, 2024-01-15 10:23:00, TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '2024-01-01'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2024-01-01 00:00:00\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', 2024-01-01 00:00:00)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' HOUR) tab(bs)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"bs\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' MONTH) tab(bs)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"bs\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL MONTH\"",
+    "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"scalarsubquery()\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"scalarsubquery()\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL MONTH\"",
+    "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"o\"",
+    "inputName" : "`origin`",
+    "inputType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"o\"",
+    "inputName" : "`origin`",
+    "inputType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00'))
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"scalarsubquery()\"",
+    "inputName" : "`origin`",
+    "inputType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', scalarsubquery())\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 112,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00'))"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`time_bucket`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR)"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00')
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "4",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`time_bucket`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 136,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(NULL, TIMESTAMP '2024-01-01 11:27:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', NULL)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, NULL)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP '2024-01-01 00:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00')
+-- !query analysis
+Project [time_bucket(INTERVAL '01' HOUR, null, 2024-01-01 00:00:00, Some(UTC)) AS time_bucket(INTERVAL '01' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00')#x]
++- OneRowRelation
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '7' DAY, TIMESTAMP '2024-01-10 11:27:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1 00:30' DAY TO MINUTE, TIMESTAMP '2024-06-20 10:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0.000001' SECOND, TIMESTAMP '2024-06-20 10:00:00.123456')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00')
+-- !query analysis
+Project [time_bucket(INTERVAL '15' MINUTE, 2024-01-01 11:27:00, 1970-01-01 00:00:00, Some(UTC)) AS time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP_NTZ '1970-01-01 00:00:00')#x]
++- OneRowRelation
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:05:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:15:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 11:27:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2025-01-01 00:30:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00')
+-- !query analysis
+Project [time_bucket(INTERVAL '01' HOUR, 2024-01-15 10:23:00, 2024-01-15 00:30:00, Some(UTC)) AS time_bucket(INTERVAL '01' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00')#x]
++- OneRowRelation
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 11:27:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-05-15 10:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2024-05-15 10:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1-3' YEAR TO MONTH, TIMESTAMP '2024-06-20 10:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00')
+-- !query analysis
+Project [time_bucket(INTERVAL '1' MONTH, 2024-03-15 11:27:00, 1970-01-01 00:00:00, Some(UTC)) AS time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00', TIMESTAMP_NTZ '1970-01-01 00:00:00')#x]
++- OneRowRelation
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 00:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-20 09:00:00', TIMESTAMP '1970-01-15 00:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 10:00:00', TIMESTAMP '2024-03-15 10:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 12:00:00', TIMESTAMP '1970-01-31 00:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2025-03-01 00:00:00', TIMESTAMP '2024-02-29 00:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-02-15 10:00:00', TIMESTAMP '2024-08-01 00:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')
+-- !query analysis
+Project [time_bucket(INTERVAL '3' MONTH, 2024-08-20 14:30:00, 2024-01-01 00:00:00, Some(UTC)) AS time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')#x]
++- OneRowRelation
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' DAY, TIMESTAMP '1969-12-31 23:30:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '1969-12-31 23:30:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1960-06-15 00:30:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '1968-07-15 10:00:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '10' MINUTE + INTERVAL '5' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '2' MONTH + INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00')
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '2024-01-01 00:00:00' + INTERVAL '5' MINUTE)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00' + INTERVAL '30' MINUTE)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' HOUR, t) AS bucket
+  FROM VALUES (TIMESTAMP '2024-01-15 10:23:00'), (TIMESTAMP '2024-01-15 14:45:00'), (CAST(NULL AS TIMESTAMP)) tab(t)
+  ORDER BY t
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '15' MINUTE, t) AS bucket
+  FROM VALUES (TIMESTAMP_NTZ '2024-01-15 10:23:00'), (TIMESTAMP_NTZ '2024-01-15 14:07:00') tab(t)
+  ORDER BY t
+-- !query analysis
+Sort [t#x ASC NULLS FIRST], true
++- Project [t#x, time_bucket(INTERVAL '15' MINUTE, t#x, 1970-01-01 00:00:00, Some(UTC)) AS bucket#x]
+   +- SubqueryAlias tab
+      +- LocalRelation [t#x]
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket
+  FROM VALUES (TIMESTAMP '2024-03-15 10:23:00'), (TIMESTAMP '2024-06-01 00:00:00') tab(t)
+  ORDER BY t
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SET TIME ZONE 'America/Los_Angeles'
+-- !query analysis
+SetCommand (spark.sql.session.timeZone,Some(America/Los_Angeles))
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-02-15 10:00:00'),
+    (TIMESTAMP '2024-03-15 10:00:00'),
+    (TIMESTAMP '2024-04-15 10:00:00') tab(t)
+  ORDER BY t
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' MONTH, CAST(t AS TIMESTAMP_NTZ)) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-02-15 10:00:00'),
+    (TIMESTAMP '2024-03-15 10:00:00'),
+    (TIMESTAMP '2024-04-15 10:00:00') tab(t)
+  ORDER BY t
+-- !query analysis
+Sort [t#x ASC NULLS FIRST], true
++- Project [t#x, time_bucket(INTERVAL '1' MONTH, cast(t#x as timestamp_ntz), 1970-01-01 00:00:00, Some(America/Los_Angeles)) AS bucket#x]
+   +- SubqueryAlias tab
+      +- LocalRelation [t#x]
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' DAY, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-03-09 12:00:00'),
+    (TIMESTAMP '2024-03-10 12:00:00'),
+    (TIMESTAMP '2024-03-11 12:00:00') tab(t)
+  ORDER BY t
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' DAY, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-11-02 12:00:00'),
+    (TIMESTAMP '2024-11-03 12:00:00'),
+    (TIMESTAMP '2024-11-04 12:00:00') tab(t)
+  ORDER BY t
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '36' HOUR, t, TIMESTAMP '2024-11-01 00:00:00') AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-11-05 11:30:00') tab(t)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
index 977b1e1459c3e..11c24c8cc3405 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -359,6 +359,39 @@ Project [length(cast(cast(1996-09-10 10:11:12.4 as timestamp) as string)) AS len
             +- OneRowRelation
 
 
+-- !query
+SELECT '12:00:00' = TIME'12:00:00' FROM t
+-- !query analysis
+Project [(cast(12:00:00 as time(6)) = 12:00:00) AS (12:00:00 = TIME '12:00:00')#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT '12:00:01' > TIME'12:00:00' FROM t
+-- !query analysis
+Project [(cast(12:00:01 as time(6)) > 12:00:00) AS (12:00:01 > TIME '12:00:00')#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
+
+
+-- !query
+SELECT time_trunc('HOUR', '12:34:56') FROM t
+-- !query analysis
+Project [time_trunc(HOUR, cast(12:34:56 as time(6))) AS time_trunc(HOUR, 12:34:56)#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
+
+
 -- !query
 SELECT year( '1996-01-10') FROM t
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out
index 73b266fb02f99..0915e3db57cd0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out
@@ -111,7 +111,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, SELECT 1, false, true, Persiste
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -197,7 +197,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -235,7 +235,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -294,7 +294,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -344,7 +344,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -397,7 +397,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -443,7 +443,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -489,7 +489,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -551,7 +551,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -600,7 +600,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -637,7 +637,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -674,7 +674,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -707,7 +707,7 @@ SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(true))
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -732,7 +732,7 @@ SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(true))
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out
index e4292334e784c..c91e3d3ee6deb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out
@@ -37,7 +37,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -73,7 +73,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -114,7 +114,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -151,7 +151,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -198,7 +198,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -211,7 +211,7 @@ AlterViewSchemaBindingCommand `spark_catalog`.`default`.`v`, BINDING
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -247,7 +247,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-compensation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-compensation.sql.out
index bf05d5cd617a8..15210983b7d86 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-compensation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-compensation.sql.out
@@ -43,7 +43,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -83,7 +83,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -123,7 +123,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -185,7 +185,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -234,7 +234,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -271,7 +271,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -308,7 +308,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -345,7 +345,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, P
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -395,7 +395,7 @@ AlterViewSchemaBindingCommand `spark_catalog`.`default`.`v`, COMPENSATION
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-evolution.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-evolution.sql.out
index 36b2e04df0f8e..c52181dbc360e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-evolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-evolution.sql.out
@@ -44,7 +44,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -83,7 +83,7 @@ Project [c4#x, c5#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -122,7 +122,7 @@ Project [c4#x, c5#x, c6#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -170,7 +170,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -201,7 +201,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -250,7 +250,7 @@ Project [a1#x, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -290,7 +290,7 @@ Project [a1#x, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -330,7 +330,7 @@ Project [a1#x, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -379,7 +379,7 @@ Project [a1#x, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -416,7 +416,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -453,7 +453,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -482,7 +482,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -498,7 +498,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -530,7 +530,7 @@ Project [a1#xL, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -546,7 +546,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,Some(a1)), (a2,Some(a2))],
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -578,7 +578,7 @@ Project [a1#xL, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -594,7 +594,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, P
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -625,7 +625,7 @@ Project [c1#xL, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -784,7 +784,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-type-evolution.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-type-evolution.sql.out
index f097ae082546e..05480903885f8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-type-evolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-type-evolution.sql.out
@@ -45,7 +45,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -85,7 +85,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -125,7 +125,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -174,7 +174,7 @@ Project [c1#x, c2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -211,7 +211,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -248,7 +248,7 @@ org.apache.spark.sql.AnalysisException
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -277,7 +277,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -309,7 +309,7 @@ Project [a1#xL, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -325,7 +325,7 @@ CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,Some(a1)), (a2,Some(a2))],
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -357,7 +357,7 @@ Project [a1#xL, a2#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -427,7 +427,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
@@ -452,7 +452,7 @@ Project [c1#x]
 DESCRIBE EXTENDED v
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
-+- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, `spark_catalog`.`default`.`v`
++- ResolvedPersistentView V2SessionCatalog(spark_catalog), default.v, spark_catalog.default.v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql b/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql
new file mode 100644
index 0000000000000..19d2154936f9c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/extract-value-nulltype-single-pass.sql
@@ -0,0 +1,10 @@
+-- SPARK-57186: multipart field access (col.a) on a NullType base propagates NULL under the
+-- single-pass resolver as well, consistently with the legacy analyzer. Dual-running both analyzers
+-- locks in that consistency (no HYBRID_ANALYZER_EXCEPTION).
+-- The col[0]/col['key'] subscript forms are intentionally not covered here: the single-pass
+-- resolver does not resolve subscript extraction (UnresolvedExtractValue) at all -- a pre-existing
+-- limitation independent of NullType -- so they are exercised only under the legacy analyzer in
+-- extract-value-resolution-edge-cases.sql.
+--SET spark.sql.analyzer.singlePassResolver.dualRunWithLegacy=true
+
+SELECT col.a FROM (SELECT null AS col) t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql b/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql
index 5a2784d542702..48ebdfc0a3fab 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/extract-value-resolution-edge-cases.sql
@@ -8,3 +8,13 @@ SELECT col1.a, a FROM t1 ORDER BY col1.a;
 SELECT split(col1, '-')[1] AS a FROM VALUES('a-b') ORDER BY split(col1, '-')[1];
 
 DROP TABLE t1;
+
+-- SPARK-57186: extracting a field/element/key from a NullType base returns NULL instead of
+-- throwing INVALID_EXTRACT_BASE_FIELD_TYPE (SQL NULL propagation; a NullType column can arise e.g.
+-- from schema evolution with missing columns). This applies uniformly to dotted field access
+-- (`col.a`) and the subscript forms (`col[0]`, `col['key']`), and is implemented at the
+-- user-facing resolution sites (ExtractValue.applyOrNull) without changing the shared
+-- ExtractValue.extractValue utility.
+SELECT col.a FROM (SELECT null AS col) t;
+SELECT col[0] FROM (SELECT null AS col) t;
+SELECT col['key'] FROM (SELECT null AS col) t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql b/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql
index 1f53ca359fe13..de3f5c8cc43f2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/having-and-order-by-recursive-type-name-resolution.sql
@@ -141,3 +141,10 @@ FROM VALUES (NAMED_STRUCT('nums', ARRAY(10, 20))) t (col1)
 GROUP BY col1
 HAVING col1.nums[0] + col1.nums[1] > 25
 ORDER BY col1.nums[0];
+
+-- SPARK-57186: Alias type: Struct, Table column type: NullType (void).
+-- Unlike the STRING/ARRAY/MAP input bases above, which throw INVALID_EXTRACT_BASE_FIELD_TYPE for
+-- this shadowing pattern, a NullType input column that shadows the struct alias yields NULL
+-- (NULL propagation). The HAVING predicate is therefore NULL and the row is filtered out, giving
+-- an empty result. NullType is intentionally the one base type that does not error here.
+SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
index e3cef9207d20f..8a71afb38a76f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
@@ -552,6 +552,36 @@ left join
      order by t_inner.b1,t_inner.b2 desc limit 1
  ) as lateral_table;
 
+-- lateral join after NATURAL/USING JOIN: outer attribute visibility
+
+-- lateral after NATURAL JOIN: unqualified key resolves to the merged column
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+     nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k);
+
+-- lateral after NATURAL JOIN: qualified keys resolve to original columns
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+     nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k, nj1.k AS qual_nj1k, nj2.k AS qual_nj2k);
+
+-- lateral after USING JOIN: unqualified and qualified keys
+WITH uj1(k, v1) AS (VALUES (1, 'a')),
+     uj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM uj1 JOIN uj2 USING (k),
+LATERAL (SELECT k AS unq_k, uj1.k AS qual_uj1k, uj2.k AS qual_uj2k);
+
+-- lateral cannot see column hidden by a subquery alias
+WITH cte1(k, v1) AS (VALUES (1, 'a'))
+SELECT * FROM (SELECT k FROM cte1 ORDER BY v1) sub,
+LATERAL (SELECT v1 AS leaked);
+
+-- lateral cannot see column not in GROUP BY output
+WITH cte1(k, v1) AS (VALUES (1, 'a'), (2, 'b'), (3, 'c'))
+SELECT * FROM (SELECT k FROM cte1 GROUP BY k) g,
+LATERAL (SELECT v1 AS leaked);
+
 -- clean up
 DROP VIEW t1;
 DROP VIEW t2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/join-nearest-by.sql b/sql/core/src/test/resources/sql-tests/inputs/join-nearest-by.sql
new file mode 100644
index 0000000000000..40cfa87c4cde0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-nearest-by.sql
@@ -0,0 +1,155 @@
+-- Test cases for NEAREST BY top-K ranking join.
+
+CREATE VIEW users(user_id, score) AS VALUES (1, 10.0), (2, 20.0), (3, 30.0);
+CREATE VIEW products(product, pscore) AS VALUES ('A', 11.0), ('B', 22.0), ('C', 5.0);
+
+-- Basic APPROX NEAREST BY SIMILARITY with k = 1
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- SELECT * to validate the output schema. Must surface only the user-visible columns from
+-- left and right (`user_id`, `score`, `product`, `pscore`) -- no rewrite-internal columns
+-- (`__qid`, `__nearest_matches__`, `__ranking__`) and no Generator-aliased names.
+SELECT *
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- Same schema check but for LEFT OUTER. Right-side columns are nullable in this mode (left
+-- rows with no matches surface as NULL); the schema still must not leak internal columns.
+SELECT *
+FROM users u LEFT OUTER JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- APPROX NEAREST BY DISTANCE with k = 2
+SELECT u.user_id, p.product, p.pscore
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore);
+
+-- EXACT NEAREST BY SIMILARITY with default k = 1
+SELECT u.user_id, p.product
+FROM users u INNER JOIN products p
+  EXACT NEAREST BY SIMILARITY -abs(u.score - p.pscore);
+
+-- LEFT OUTER JOIN with NEAREST BY, empty right side
+SELECT u.user_id, p.product
+FROM users u LEFT OUTER JOIN (SELECT * FROM products WHERE false) p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- INNER JOIN with NEAREST BY, empty right side
+SELECT u.user_id, p.product
+FROM users u INNER JOIN (SELECT * FROM products WHERE false) p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- Explicit INNER keyword
+SELECT u.user_id, p.product
+FROM users u INNER JOIN products p
+  APPROX NEAREST 1 BY DISTANCE abs(u.score - p.pscore);
+
+-- Self-join: same relation on both sides. Exercises DeduplicateRelations' NearestByJoin
+-- arm, which rewrites the right side with fresh ExprIds so the join resolves. Each row's
+-- nearest match by `abs(score - score)` is itself, so the output is deterministic.
+SELECT a.user_id AS a_id, b.user_id AS b_id
+FROM users a JOIN users b
+  APPROX NEAREST 1 BY DISTANCE abs(a.score - b.score)
+ORDER BY a.user_id, b.user_id;
+
+-- Error: unsupported join type (RIGHT OUTER)
+SELECT u.user_id, p.product
+FROM users u RIGHT OUTER JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- Error: num_results out of range (0)
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 0 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- Error: num_results out of range (100001)
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 100001 BY SIMILARITY -abs(u.score - p.pscore);
+
+-- Error: non-orderable ranking expression
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY map(u.score, p.pscore);
+
+-- Both EXACT and APPROX permit a nondeterministic ranking expression. Rows differ run to
+-- run, so we only assert the row count: one match per left row when k = 1.
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    EXACT NEAREST 1 BY SIMILARITY rand() + p.pscore
+);
+
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    APPROX NEAREST 1 BY SIMILARITY rand() + p.pscore
+);
+
+-- Same with k = 2 to exercise the multi-match path with rand().
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    APPROX NEAREST 2 BY DISTANCE rand() + p.pscore
+);
+
+-- EXPLAIN of APPROX + nondeterministic ranking. Locks in the plan shape: the rewrite
+-- injects a Project above the Join that materializes `rand(0) + p.pscore` as `__ranking__`,
+-- An explicit seed is used so the EXPLAIN string is byte-stable across runs (without it,
+-- `rand()` synthesizes a fresh random seed each time and the seed appears in the EXPLAIN).
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY rand(0) + p.pscore;
+
+-- EXPLAIN of a query whose left-side predicate (user_id > 1) is pushed down to the left
+-- input of the rewrite's synthetic join. Demonstrates that pushdown rules walk through
+-- the rewrite's Generate -> Aggregate -> Join shape and reach the underlying left input.
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+WHERE u.user_id > 1;
+
+-- EXPLAIN of a query whose right-side predicate (p.product != 'C') cannot push below the
+-- rewrite's Generate(inline) and stays above it. Demonstrates that the optimizer pipeline
+-- runs end-to-end and the rewrite's plan shape (Generate over Aggregate over Join) survives
+-- to physical planning even when a top-level filter cannot be pushed into it.
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+WHERE p.product != 'C';
+
+-- The rewrite produces an unconditioned cross-product internally. When the user has opted
+-- out of cross-products via `spark.sql.crossJoin.enabled = false`, NEAREST BY queries are
+-- rejected by `CheckCartesianProducts` -- the rewrite does not bypass the user's choice.
+SET spark.sql.crossJoin.enabled = false;
+
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore);
+
+SET spark.sql.crossJoin.enabled = true;
+
+-- Tie behavior: when multiple right rows have equal ranking values for a given left row,
+-- MaxMinByK breaks ties arbitrarily (the SPIP marks tie-break as unspecified). We can't
+-- pin specific rows, but the operator must still return exactly `numResults` matches per
+-- left row when enough candidates exist.
+CREATE OR REPLACE TEMP VIEW tied_products(product, pscore)
+  AS VALUES ('A', 10.0), ('B', 10.0), ('C', 10.0);
+
+SELECT u.user_id, COUNT(*) AS num_matches
+FROM users u JOIN tied_products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+GROUP BY u.user_id
+ORDER BY u.user_id;
+
+DROP VIEW tied_products;
+DROP VIEW users;
+DROP VIEW products;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence-legacy.sql
new file mode 100644
index 0000000000000..b7f70266cc8f2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence-legacy.sql
@@ -0,0 +1,3 @@
+--IMPORT parameterless-function-name-precedence.sql
+
+--SET spark.sql.legacy.allowUdfParameterToShadowParameterlessFunction=true
diff --git a/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence.sql b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence.sql
new file mode 100644
index 0000000000000..6d2222cf3308c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/parameterless-function-name-precedence.sql
@@ -0,0 +1,48 @@
+-- Precedence between parameterless built-in functions and other resolution candidates
+-- (column, LCA, outer reference, session variable). The outer-reference cases for
+-- `current_time` are omitted because the value is non-deterministic; other `current_time`
+-- patterns are covered (column-wins below, and the UDF-param case in the companion
+-- sql-udf-name-precedence.sql).
+
+CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user;
+CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time;
+
+-- Column wins over parameterless function.
+SELECT current_user FROM v_user;
+SELECT current_time FROM v_time;
+
+-- Parameterless function wins over LCA. Compared against `current_user()` (with alias) so
+-- the golden stays stable across test envs that return different user names.
+SELECT 'abc' AS current_user, current_user = current_user() AS function_won;
+
+-- Parameterless function wins over outer reference.
+SELECT (SELECT current_user) = current_user() AS function_won FROM v_user;
+
+DECLARE current_user = 'abc';
+
+-- Column wins over both the parameterless function and the session variable.
+SELECT current_user, current_user FROM v_user;
+
+DROP TEMPORARY VARIABLE current_user;
+
+-- Parameterless function wins over outer reference (current_date / current_timestamp).
+-- typeof keeps the golden stable across clock changes.
+WITH t1 AS (SELECT 1 AS current_date)
+SELECT typeof((SELECT current_date)) FROM t1;
+
+WITH t1 AS (SELECT 1 AS current_timestamp)
+SELECT typeof((SELECT current_timestamp)) FROM t1;
+
+-- Parameterless function wins over outer reference (user / session_user).
+WITH t1 AS (SELECT 1 AS user)
+SELECT (SELECT user) = current_user() AS function_won FROM t1;
+
+WITH t1 AS (SELECT 1 AS session_user)
+SELECT (SELECT session_user) = current_user() AS function_won FROM t1;
+
+-- grouping__id: the resolution rule applies but the function itself only makes sense
+-- inside grouping analytics. Assert the rule fires structurally via type/error pattern.
+SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user));
+
+DROP VIEW v_user;
+DROP VIEW v_time;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql
new file mode 100644
index 0000000000000..1137c747320ca
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql
@@ -0,0 +1,421 @@
+-- ============================================================================
+-- SQL Standard PATH golden coverage
+-- ============================================================================
+--
+-- This file is the readable, SQL-level reference for what the PATH feature
+-- does. It is the primary place to look up "how does SET PATH behave when
+-- I write ..." before reaching for the Scala unit suites. Tests that need
+-- features not expressible in pure SQL (multi-threaded execution, session
+-- cloning, view-metadata inspection, Connect/PySpark plumbing) live in the
+-- matching Scala / Python suites.
+--
+-- Table of Contents
+-- -----------------
+--   1. Default path observability (no SET PATH issued)
+--   2. SET PATH grammar
+--      2.1 Literal schema entries; case preservation; backtick quoting
+--      2.2 DEFAULT_PATH shortcut
+--      2.3 SYSTEM_PATH shortcut
+--      2.4 PATH keyword (append to live path)
+--      2.5 current_schema / current_database shortcuts
+--   3. CURRENT_PATH() builtin
+--      3.1 ANSI no-parens form equals current_path()
+--      3.2 Argument-count validation
+--   4. Static error conditions at SET PATH
+--      4.1 Literal duplicate
+--      4.2 DEFAULT_PATH expansion duplicate
+--      4.3 SYSTEM_PATH expansion duplicate
+--      4.4 current_database vs current_schema cross-alias duplicate
+--      4.5 Single-part schema reference rejected
+--   5. Routine resolution via PATH
+--      5.1 Persistent scalar function follows PATH
+--      5.2 Persistent table function follows PATH
+--      5.3 First-match ordering across two schemas on PATH
+--      5.4 Unqualified miss when schema is not on PATH
+--   6. Relation resolution via PATH
+--      6.1 Table resolved via PATH; first-match ordering
+--      6.2 Unqualified miss when schema is not on PATH
+--   7. Persisted view frozen-path behavior
+--      7.1 View body resolves via creation-time PATH (not invoker PATH)
+--      7.2 current_schema / current_path in view body use invoker context
+--   8. SQL function frozen-path behavior
+--      8.1 Scalar function body resolves via creation-time PATH
+--      8.2 Table function body resolves via creation-time PATH
+--      8.3 current_schema / current_path in function body use invoker context
+--   9. DEFAULT_PATH conf (spark.sql.defaultPath)
+--      9.1 Explicit SET PATH overrides the conf
+--      9.2 SET PATH = DEFAULT_PATH expands to the conf value
+--      9.3 Invalid conf value rejected
+--  10. PATH disabled
+--      10.1 current_path() still resolves (regular builtin)
+--      10.2 SET PATH itself is rejected
+-- ============================================================================
+
+--SET spark.sql.path.enabled=true
+
+
+-- ============================================================================
+-- 1. Default path observability (no SET PATH issued)
+-- ============================================================================
+
+-- The session was opened with PATH enabled and no `SET PATH` issued, so the
+-- effective path is the spark-builtin default ordering with current_schema in
+-- the catalog slot.
+SELECT current_path();
+
+
+-- ============================================================================
+-- 2. SET PATH grammar
+-- ============================================================================
+
+-- 2.1 Literal schema entries; case preservation; backtick quoting -------------
+
+SET PATH = spark_catalog.default, system.builtin;
+SELECT current_path();
+
+-- Case is preserved exactly as typed.
+SET PATH = Spark_Catalog.Default, System.Builtin;
+SELECT current_path();
+
+-- Backtick-quoted identifiers that contain dots round-trip with quoting.
+SET PATH = spark_catalog.`sch.b`, system.builtin;
+SELECT current_path();
+
+-- Multi-level namespace (3+ parts) is accepted by the grammar. The stored entry
+-- is verified at the Scala layer (SetPathSuite) because the session catalog
+-- only supports single-part namespaces, so calling current_path() while a
+-- multi-level entry is on the path would surface that catalog limitation
+-- rather than the PATH grammar property under test here.
+
+SET PATH = DEFAULT_PATH;
+
+
+-- 2.2 DEFAULT_PATH shortcut ---------------------------------------------------
+
+SET PATH = DEFAULT_PATH;
+SELECT current_path();
+
+
+-- 2.3 SYSTEM_PATH shortcut ----------------------------------------------------
+--
+-- SYSTEM_PATH expands to the system-managed namespaces under the `system`
+-- catalog. Today that is just `system.builtin`; the shortcut is reserved for
+-- future system-managed schemas.
+
+SET PATH = SYSTEM_PATH;
+SELECT current_path();
+
+-- SYSTEM_PATH composes naturally with CURRENT_SCHEMA to give "system functions
+-- plus my working schema".
+USE spark_catalog.default;
+SET PATH = SYSTEM_PATH, CURRENT_SCHEMA;
+SELECT current_path();
+SET PATH = DEFAULT_PATH;
+
+
+-- 2.4 PATH keyword (append to live path) --------------------------------------
+
+SET PATH = spark_catalog.default, system.builtin;
+SET PATH = PATH, system.session;
+SELECT current_path();
+
+
+-- 2.5 current_schema / current_database shortcuts -----------------------------
+
+USE spark_catalog.default;
+SET PATH = current_schema, system.builtin;
+SELECT current_path();
+
+-- current_database is a SQL alias for current_schema.
+SET PATH = current_database, system.builtin;
+SELECT current_path();
+
+SET PATH = DEFAULT_PATH;
+
+
+-- ============================================================================
+-- 3. CURRENT_PATH() builtin
+-- ============================================================================
+
+-- 3.1 ANSI no-parens form equals current_path() ------------------------------
+
+SET PATH = spark_catalog.default, system.builtin;
+SELECT CURRENT_PATH = current_path() AS ansi_form_matches;
+
+
+-- 3.2 Argument-count validation ----------------------------------------------
+
+SELECT current_path(1);
+
+SET PATH = DEFAULT_PATH;
+
+
+-- ============================================================================
+-- 4. Static error conditions at SET PATH
+-- ============================================================================
+
+-- 4.1 Literal duplicate -------------------------------------------------------
+
+SET PATH = spark_catalog.default, spark_catalog.default;
+
+-- Case-insensitive duplicate is still flagged.
+SET PATH = spark_catalog.DEFAULT, spark_catalog.default;
+
+
+-- 4.2 DEFAULT_PATH expansion duplicate ----------------------------------------
+
+-- DEFAULT_PATH already contains system.builtin; listing it again is a duplicate
+-- after expansion.
+SET PATH = DEFAULT_PATH, system.builtin;
+
+
+-- 4.3 SYSTEM_PATH expansion duplicate -----------------------------------------
+
+SET PATH = SYSTEM_PATH, SYSTEM_PATH;
+
+
+-- 4.4 current_database vs current_schema cross-alias duplicate ----------------
+
+SET PATH = current_database, current_schema;
+
+
+-- 4.5 Single-part schema reference rejected -----------------------------------
+
+SET PATH = my_schema_no_catalog;
+
+
+-- ============================================================================
+-- 5. Routine resolution via PATH
+-- ============================================================================
+
+-- 5.1 Persistent scalar function follows PATH ---------------------------------
+
+CREATE SCHEMA sql_path_routines;
+CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7;
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin;
+SELECT pick();
+SET PATH = DEFAULT_PATH;
+
+
+-- 5.2 Persistent table function follows PATH ----------------------------------
+
+CREATE FUNCTION sql_path_routines.pick_tvf()
+RETURNS TABLE(val INT)
+RETURN SELECT 7 AS val;
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin;
+SELECT * FROM pick_tvf();
+SET PATH = DEFAULT_PATH;
+
+
+-- 5.3 First-match ordering across two schemas on PATH ------------------------
+
+CREATE SCHEMA sql_path_routines_b;
+CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11;
+
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin;
+SELECT pick() AS from_first_schema;
+SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin;
+SELECT pick() AS from_first_schema;
+SET PATH = DEFAULT_PATH;
+
+
+-- 5.4 Unqualified miss when schema is not on PATH -----------------------------
+
+SET PATH = spark_catalog.default, system.builtin;
+SELECT pick();
+
+-- Cleanup section 5.
+SET PATH = DEFAULT_PATH;
+DROP FUNCTION sql_path_routines.pick;
+DROP FUNCTION sql_path_routines.pick_tvf;
+DROP FUNCTION sql_path_routines_b.pick;
+DROP SCHEMA sql_path_routines;
+DROP SCHEMA sql_path_routines_b;
+
+
+-- ============================================================================
+-- 6. Relation resolution via PATH
+-- ============================================================================
+
+CREATE SCHEMA sql_path_relations_a;
+CREATE SCHEMA sql_path_relations_b;
+CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id;
+CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id;
+
+-- 6.1 First-match ordering ----------------------------------------------------
+
+SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin;
+SELECT id FROM tbl AS from_first_schema;
+SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin;
+SELECT id FROM tbl AS from_first_schema;
+
+
+-- 6.2 Unqualified miss when schema is not on PATH -----------------------------
+
+SET PATH = spark_catalog.default, system.builtin;
+SELECT id FROM tbl;
+
+-- Cleanup section 6.
+SET PATH = DEFAULT_PATH;
+DROP TABLE sql_path_relations_a.tbl;
+DROP TABLE sql_path_relations_b.tbl;
+DROP SCHEMA sql_path_relations_a;
+DROP SCHEMA sql_path_relations_b;
+
+
+-- ============================================================================
+-- 7. Persisted view frozen-path behavior
+-- ============================================================================
+
+CREATE SCHEMA sql_path_views_a;
+CREATE SCHEMA sql_path_views_b;
+CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id;
+CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id;
+
+-- 7.1 View body resolves via creation-time PATH (not invoker PATH) ------------
+
+SET PATH = spark_catalog.sql_path_views_a, system.builtin;
+CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t;
+
+-- Flip the live PATH; the view body's unqualified `frozen_t` must still
+-- resolve through the schema captured at CREATE VIEW (sql_path_views_a, id=1).
+-- A bare query against `frozen_t` from the session follows the LIVE PATH and
+-- returns the other table's row (id=2).
+SET PATH = spark_catalog.sql_path_views_b, system.builtin;
+SELECT id FROM frozen_t AS bare_lookup_uses_live_path;
+SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path;
+
+
+-- 7.2 current_schema / current_path in view body use invoker context ----------
+
+USE spark_catalog.sql_path_views_a;
+CREATE VIEW sql_path_views_a.v_ctx AS
+SELECT current_schema() AS cs, current_path() AS cp;
+
+USE spark_catalog.sql_path_views_b;
+SET PATH = DEFAULT_PATH;
+-- The view body re-evaluates current_schema() / current_path() on every
+-- invocation against the INVOKER's context, not the creator's. The result
+-- here must reflect sql_path_views_b (the invoker), not sql_path_views_a
+-- (the creator's schema at CREATE VIEW).
+SELECT cs, cp FROM sql_path_views_a.v_ctx;
+
+-- Cleanup section 7.
+USE spark_catalog.default;
+SET PATH = DEFAULT_PATH;
+DROP VIEW default.v_path_frozen;
+DROP VIEW sql_path_views_a.v_ctx;
+DROP TABLE sql_path_views_a.frozen_t;
+DROP TABLE sql_path_views_b.frozen_t;
+DROP SCHEMA sql_path_views_a;
+DROP SCHEMA sql_path_views_b;
+
+
+-- ============================================================================
+-- 8. SQL function frozen-path behavior
+-- ============================================================================
+
+CREATE SCHEMA sql_path_fn_a;
+CREATE SCHEMA sql_path_fn_b;
+CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id;
+CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id;
+
+-- 8.1 Scalar function body resolves via creation-time PATH --------------------
+
+SET PATH = spark_catalog.sql_path_fn_a, system.builtin;
+CREATE FUNCTION default.frozen_fn()
+RETURNS INT
+RETURN (SELECT MAX(id) FROM frozen_t);
+
+SET PATH = spark_catalog.sql_path_fn_b, system.builtin;
+SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path;
+SELECT default.frozen_fn() AS scalar_body_uses_frozen_path;
+
+
+-- 8.2 Table function body resolves via creation-time PATH ---------------------
+
+SET PATH = spark_catalog.sql_path_fn_a, system.builtin;
+CREATE FUNCTION default.frozen_tvf()
+RETURNS TABLE(id INT)
+RETURN SELECT MAX(id) AS id FROM frozen_t;
+
+SET PATH = spark_catalog.sql_path_fn_b, system.builtin;
+SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path;
+
+
+-- 8.3 current_schema / current_path in function body use invoker context -----
+
+USE spark_catalog.sql_path_fn_a;
+CREATE FUNCTION sql_path_fn_a.f_ctx()
+RETURNS STRING
+RETURN concat(current_schema(), '::', current_path());
+
+USE spark_catalog.sql_path_fn_b;
+SET PATH = DEFAULT_PATH;
+-- Like 7.2: current_schema() / current_path() in a SQL function body bind to
+-- the INVOKER's context, not the creator's.
+SELECT sql_path_fn_a.f_ctx() AS invoker_context;
+
+-- Cleanup section 8.
+USE spark_catalog.default;
+SET PATH = DEFAULT_PATH;
+DROP FUNCTION default.frozen_fn;
+DROP FUNCTION default.frozen_tvf;
+DROP FUNCTION sql_path_fn_a.f_ctx;
+DROP TABLE sql_path_fn_a.frozen_t;
+DROP TABLE sql_path_fn_b.frozen_t;
+DROP SCHEMA sql_path_fn_a;
+DROP SCHEMA sql_path_fn_b;
+
+
+-- ============================================================================
+-- 9. DEFAULT_PATH conf (spark.sql.defaultPath)
+-- ============================================================================
+--
+-- The conf's RHS is captured as a raw string by the SQL `SET key = value`
+-- form; keywords like `current_schema` and shortcut tokens like `SYSTEM_PATH`
+-- must be written WITHOUT backticks so the conf's SET-PATH-grammar validator
+-- recognizes them as path tokens rather than 1-part quoted identifiers.
+
+-- 9.1 Explicit SET PATH overrides the conf ------------------------------------
+
+SET spark.sql.defaultPath = system.session, system.builtin;
+SET PATH = system.builtin, system.session;
+SELECT current_path() AS explicit_set_path_wins_over_conf;
+SET PATH = DEFAULT_PATH;
+RESET spark.sql.defaultPath;
+
+
+-- 9.2 SET PATH = DEFAULT_PATH expands to the conf value -----------------------
+
+SET spark.sql.defaultPath = system.session, system.builtin, current_schema;
+USE spark_catalog.default;
+SET PATH = DEFAULT_PATH;
+SELECT current_path() AS default_path_expands_to_conf;
+RESET spark.sql.defaultPath;
+SET PATH = DEFAULT_PATH;
+
+
+-- 9.3 Invalid conf value rejected at SET time ---------------------------------
+
+SET spark.sql.defaultPath = this is not a path;
+
+-- The PATH keyword is not allowed in the conf value (it would create a cycle).
+SET spark.sql.defaultPath = PATH, system.builtin;
+
+
+-- ============================================================================
+-- 10. PATH disabled
+-- ============================================================================
+
+SET spark.sql.path.enabled = false;
+
+
+-- 10.1 current_path() still resolves (regular builtin) ------------------------
+
+SELECT current_path() IS NOT NULL AS has_path;
+
+
+-- 10.2 SET PATH itself is rejected --------------------------------------------
+
+SET PATH = spark_catalog.default;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
index 2e4eaa1f8f6ca..86cd70cfbf981 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
@@ -83,6 +83,19 @@ DROP TEMPORARY VARIABLE var1;
 SET VARIABLE title = 'Test variable in aggregate';
 SELECT (SELECT MAX(id) FROM RANGE(10) WHERE id < title) FROM VALUES 1, 2 AS t(title);
 
+SET VARIABLE title = 'Dropped struct variable -- field access vs qualified name';
+-- `session.a` is ambiguous: (a) 2-part qualified variable, or (b) field `a` of a 1-part
+-- variable `session`. Variable resolution tries (a) first via longest match, falls back to
+-- (b). With `session` declared as a struct, (b) succeeds. After the variable is dropped,
+-- both interpretations fail and the SELECT falls through to column resolution, which
+-- reports `UNRESOLVED_COLUMN`. Because either interpretation could have been intended,
+-- the variable error path (when reached) must dump the full SQL path -- see
+-- `VariableResolution.searchPathEntriesForError`.
+DECLARE OR REPLACE VARIABLE session STRUCT<a INT> = NAMED_STRUCT('a', 1);
+SELECT session.a;
+DROP TEMPORARY VARIABLE session;
+SELECT session.a;
+
 SET VARIABLE title = 'Test qualifiers - fail';
 DECLARE OR REPLACE VARIABLE builtin.var1 INT;
 DECLARE OR REPLACE VARIABLE system.sesion.var1 INT;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence-legacy.sql
new file mode 100644
index 0000000000000..6b2fb51f4e6a9
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence-legacy.sql
@@ -0,0 +1,3 @@
+--IMPORT sql-udf-name-precedence.sql
+
+--SET spark.sql.legacy.allowUdfParameterToShadowParameterlessFunction=true
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql
new file mode 100644
index 0000000000000..a96e98323a872
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf-name-precedence.sql
@@ -0,0 +1,96 @@
+-- Precedence between a SQL UDF parameter and other resolution candidates (column,
+-- parameterless built-in function, LCA, outer reference, session variable, nested UDF).
+
+CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x;
+CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x;
+
+-- UDF parameter resolves when no column conflict.
+SELECT identity_fn(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1);
+
+-- Column wins over UDF parameter.
+SELECT col_vs_param(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING)
+RETURNS STRING RETURN current_user;
+
+-- Parameterless function wins over UDF parameter (current_user). Comparison against
+-- `current_user()` keeps the golden stable across envs returning different user names.
+SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won;
+
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT)
+RETURNS STRING RETURN typeof(current_date);
+
+-- Parameterless function wins over UDF parameter (current_date): the body returns 'date',
+-- which would be 'int' if the parameter alias had won.
+SELECT paramless_vs_param_date(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT)
+RETURNS STRING RETURN typeof(current_time);
+
+-- Parameterless function wins over UDF parameter (current_time): the body returns a
+-- time-typed value, vs 'int' if the parameter alias had won.
+SELECT paramless_vs_param_time(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT)
+RETURNS INT RETURN grouping__id;
+
+-- Parameterless function (grouping__id) wins over UDF parameter. grouping__id outside of
+-- a GROUPING SETS context fails analysis; assert via the resulting error class rather
+-- than a value comparison.
+SELECT paramless_vs_param_grouping(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT)
+RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y));
+
+-- LCA wins over UDF parameter (1000 = LCA won, 43 = param won).
+SELECT lca_vs_param(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT)
+RETURNS INT RETURN (SELECT (SELECT x) FROM v1);
+
+-- Outer column wins over UDF parameter.
+SELECT outer_vs_param(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT)
+RETURNS INT RETURN (SELECT (SELECT x));
+
+-- UDF parameter is visible via outer reference when no other binding is in scope.
+SELECT outer_param_pure(42);
+
+DECLARE x = 999;
+
+-- UDF parameter wins over session variable.
+SELECT identity_fn(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x;
+CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x);
+
+-- Nested UDF only sees innermost scope: inner_fn resolves 'x' from session variable (999),
+-- not outer_fn parameter (42).
+SELECT outer_fn(42);
+
+CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING)
+RETURNS TABLE(c STRING) RETURN SELECT current_user AS c;
+
+-- Parameterless function wins over a same-named table UDF parameter. Note this is NOT the
+-- SQL_FUNCTION_PARAMETER_ALIAS_METADATA_KEY rule: a table UDF body references its parameter as
+-- an outer reference, so the function already wins via the pre-existing "function beats outer
+-- reference" precedence (the legacy flag does not change this). Kept as a regression guard.
+SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored');
+
+DROP TEMPORARY VARIABLE x;
+DROP VIEW v1;
+DROP TEMPORARY FUNCTION identity_fn;
+DROP TEMPORARY FUNCTION col_vs_param;
+DROP TEMPORARY FUNCTION paramless_vs_param;
+DROP TEMPORARY FUNCTION paramless_vs_param_date;
+DROP TEMPORARY FUNCTION paramless_vs_param_time;
+DROP TEMPORARY FUNCTION paramless_vs_param_grouping;
+DROP TEMPORARY FUNCTION lca_vs_param;
+DROP TEMPORARY FUNCTION outer_vs_param;
+DROP TEMPORARY FUNCTION outer_param_pure;
+DROP TEMPORARY FUNCTION inner_fn;
+DROP TEMPORARY FUNCTION outer_fn;
+DROP TEMPORARY FUNCTION tvf_paramless_vs_param;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index 0db835bc7e85d..62b9a1b8f614e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -137,6 +137,33 @@ SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb, 4326)) <> wkb
 -- Error handling: invalid SRID.
 SELECT COUNT(*) FROM geodata WHERE ST_GeomFromWKB(wkb, 1);
 
+---- ST_AsBinary
+
+-- 1. Driver-level queries.
+SELECT ST_AsBinary(NULL);
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040')));
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'00000000013FF00000000000004000000000000000'), 'NDR'));
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR'));
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040')));
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'00000000013FF00000000000004000000000000000'), 'NDR'));
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR'));
+-- Error handling: invalid endianness.
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), '');
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'ABC');
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'big-endian');
+
+-- 2. Table-level queries.
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb)) <> wkb;
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'NDR') <> wkb;
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'XDR') = wkb;
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb)) <> wkb;
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb), 'NDR') <> wkb;
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb), 'XDR') = wkb;
+-- Error handling: invalid endianness.
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), '') IS NOT NULL;
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'ABC') IS NOT NULL;
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'big-endian') IS NOT NULL;
+
 ------ ST accessor expressions
 
 ---- ST_Srid
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
index ef1e612fd744a..b99913c680559 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
@@ -258,4 +258,20 @@ select * from (
 where t.c2 is not null;
 
 -- SPARK-43838: Subquery on single table with having clause
-SELECT c1, c2, (SELECT count(*) cnt FROM t1 t2 WHERE t1.c1 = t2.c1 HAVING cnt = 0) FROM t1
+SELECT c1, c2, (SELECT count(*) cnt FROM t1 t2 WHERE t1.c1 = t2.c1 HAVING cnt = 0) FROM t1;
+
+-- Outer star expansion in scalar subquery
+SELECT (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1);
+
+-- Outer struct star expansion in scalar subquery
+SELECT (SELECT t1.s.* FROM VALUES(2) AS t2(col1) LIMIT 1)
+FROM (SELECT named_struct('a', 1) AS s) AS t1;
+
+-- Untargeted star in subquery should NOT expand from outer scope
+SELECT (SELECT * FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1);
+
+-- Inner scope wins when star target matches both inner and outer scope
+SELECT (SELECT t1.* FROM (SELECT 3 AS col1) AS t1 LIMIT 1) FROM VALUES(1) AS t1(col1);
+
+-- Outer star expansion through a derived table wrapper
+SELECT (SELECT * FROM (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1)) FROM VALUES(1) AS t1(col1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/time-bucket.sql b/sql/core/src/test/resources/sql-tests/inputs/time-bucket.sql
new file mode 100644
index 0000000000000..b012fd5330efd
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/time-bucket.sql
@@ -0,0 +1,255 @@
+-- time_bucket function tests
+
+-- Pin session timezone to UTC. With UTC as the session zone, TIMESTAMP (LTZ)
+-- bucketing produces the same results as TIMESTAMP_NTZ. The session-zone-aware
+-- behavior is exercised in a dedicated `SET TIME ZONE 'America/Los_Angeles'`
+-- section at the end of this file.
+SET TIME ZONE 'UTC';
+
+
+-- Error: bucket_size must be positive
+
+-- Zero literal (DT, YM MONTH, YM YEAR)
+SELECT time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00');
+SELECT time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00');
+SELECT time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00');
+
+-- Negative literal (DT, YM MONTH, YM YEAR)
+SELECT time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00');
+SELECT time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00');
+SELECT time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00');
+
+-- Foldable arithmetic producing zero or negative
+SELECT time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00');
+SELECT time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00');
+SELECT time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00');
+SELECT time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00');
+
+
+-- Error: argument types
+
+-- ts and origin must be the same TIMESTAMP flavor (both TIMESTAMP or both TIMESTAMP_NTZ)
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00');
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00');
+
+-- bucket_size must be an interval (not a string)
+SELECT time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00');
+
+-- ts must be TIMESTAMP or TIMESTAMP_NTZ (not DATE or string)
+SELECT time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15');
+SELECT time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00');
+
+-- origin must be TIMESTAMP or TIMESTAMP_NTZ (not DATE or string)
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01');
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00');
+
+
+-- Error: bucket_size and origin must be foldable
+
+-- Non-foldable bucket_size via column reference (DT and YM)
+SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' HOUR) tab(bs);
+SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' MONTH) tab(bs);
+
+-- Non-foldable bucket_size via scalar subquery (DT and YM)
+SELECT time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00');
+SELECT time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00');
+
+-- Non-foldable origin via column reference (DT and YM bucket)
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o);
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o);
+
+-- Non-foldable origin via scalar subquery
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00'));
+
+
+-- Error: wrong number of arguments
+
+-- 1-arg (too few)
+SELECT time_bucket(INTERVAL '1' HOUR);
+
+-- 4-arg (too many)
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00');
+
+
+-- NULL propagation
+
+-- Typed NULL for bucket_size, ts, or origin
+SELECT time_bucket(NULL, TIMESTAMP '2024-01-01 11:27:00');
+SELECT time_bucket(INTERVAL '1' HOUR, NULL);
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', NULL);
+
+-- Both ts and origin NULL
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, NULL);
+
+-- NULL ts with explicit typed origin drives ts retyping via the builder
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP '2024-01-01 00:00:00');
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00');
+
+
+-- DayTimeInterval buckets: default (epoch) origin
+
+-- 15-minute bucket
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00');
+
+-- 1-hour bucket
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00');
+
+-- 7-day (weekly) bucket (epoch is Thursday, so buckets run Thu-Wed)
+SELECT time_bucket(INTERVAL '7' DAY, TIMESTAMP '2024-01-10 11:27:00');
+
+-- Compound DayTimeInterval (1 day 30 minutes)
+SELECT time_bucket(INTERVAL '1 00:30' DAY TO MINUTE, TIMESTAMP '2024-06-20 10:00:00');
+
+-- 1-microsecond bucket (finest supported precision)
+SELECT time_bucket(INTERVAL '0.000001' SECOND, TIMESTAMP '2024-06-20 10:00:00.123456');
+
+-- DayTimeInterval bucket on TIMESTAMP_NTZ
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00');
+
+
+-- DayTimeInterval buckets: explicit origin
+
+-- Custom origin at :05 shifts the grid so ts 11:27 lands in [11:05, 12:05)
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:05:00');
+
+-- ts exactly on a bucket boundary returns ts
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:15:00');
+
+-- ts exactly equal to origin returns origin
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 11:27:00');
+
+-- DT origin after ts
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2025-01-01 00:30:00');
+
+-- DayTimeInterval 3-arg with TIMESTAMP_NTZ
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00');
+
+
+-- YearMonthInterval buckets: default (epoch) origin
+
+-- 1-month bucket
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 11:27:00');
+
+-- 3-month (quarterly) bucket
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-05-15 10:00:00');
+
+-- 1-year bucket
+SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2024-05-15 10:00:00');
+
+-- Compound YearMonthInterval (1 year 3 months = 15 months)
+SELECT time_bucket(INTERVAL '1-3' YEAR TO MONTH, TIMESTAMP '2024-06-20 10:00:00');
+
+-- YearMonthInterval bucket on TIMESTAMP_NTZ
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00');
+
+-- ts exactly on a bucket boundary
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 00:00:00');
+
+
+-- YearMonthInterval buckets: explicit origin
+
+-- Origin on 15th aligns grid to the 15th of each month
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-20 09:00:00', TIMESTAMP '1970-01-15 00:00:00');
+
+-- ts exactly equal to origin returns origin
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 10:00:00', TIMESTAMP '2024-03-15 10:00:00');
+
+-- End-of-month capping + step-back: origin Jan 31, 1-month bucket -> 2024-02-29 (leap year)
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 12:00:00', TIMESTAMP '1970-01-31 00:00:00');
+
+-- Leap-year capping: origin Feb 29, 1-year bucket -> 2025-02-28 (non-leap target)
+SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2025-03-01 00:00:00', TIMESTAMP '2024-02-29 00:00:00');
+
+-- YM origin after ts
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-02-15 10:00:00', TIMESTAMP '2024-08-01 00:00:00');
+
+-- YearMonthInterval 3-arg with TIMESTAMP_NTZ and custom origin
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00');
+
+
+-- Pre-epoch timestamps and origins
+
+-- Pre-epoch ts, 1-day bucket
+SELECT time_bucket(INTERVAL '1' DAY, TIMESTAMP '1969-12-31 23:30:00');
+
+-- Pre-epoch ts, 1-hour bucket
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '1969-12-31 23:30:00');
+
+-- Pre-epoch origin with post-epoch ts
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1960-06-15 00:30:00');
+
+-- Pre-epoch ts, YearMonthInterval bucket
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '1968-07-15 10:00:00');
+
+
+-- Foldable expressions (bucket_size, origin, and ts are folded at analysis time)
+
+-- Foldable DayTimeInterval arithmetic in bucket_size
+SELECT time_bucket(INTERVAL '10' MINUTE + INTERVAL '5' MINUTE, TIMESTAMP '2024-06-20 09:47:00');
+SELECT time_bucket(INTERVAL '1' HOUR - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00');
+
+-- Foldable YearMonthInterval arithmetic in bucket_size
+SELECT time_bucket(INTERVAL '2' MONTH + INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00');
+
+-- Foldable arithmetic in origin
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '2024-01-01 00:00:00' + INTERVAL '5' MINUTE);
+
+-- Foldable arithmetic in ts
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00' + INTERVAL '30' MINUTE);
+
+
+-- Column reference as ts
+
+-- DayTimeInterval bucket over TIMESTAMP column (with one NULL row)
+SELECT t, time_bucket(INTERVAL '1' HOUR, t) AS bucket
+  FROM VALUES (TIMESTAMP '2024-01-15 10:23:00'), (TIMESTAMP '2024-01-15 14:45:00'), (CAST(NULL AS TIMESTAMP)) tab(t)
+  ORDER BY t;
+
+-- DayTimeInterval bucket over TIMESTAMP_NTZ column
+SELECT t, time_bucket(INTERVAL '15' MINUTE, t) AS bucket
+  FROM VALUES (TIMESTAMP_NTZ '2024-01-15 10:23:00'), (TIMESTAMP_NTZ '2024-01-15 14:07:00') tab(t)
+  ORDER BY t;
+
+-- YearMonthInterval bucket over TIMESTAMP column
+SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket
+  FROM VALUES (TIMESTAMP '2024-03-15 10:23:00'), (TIMESTAMP '2024-06-01 00:00:00') tab(t)
+  ORDER BY t;
+
+
+-- Bucketing in a non-UTC session: TIMESTAMP (LTZ) values bucket in the session time
+-- zone, so monthly and daily boundaries land at local calendar boundaries across DST
+-- (America/Los_Angeles springs forward 2024-03-10 and falls back 2024-11-03). NTZ
+-- values bucket in UTC. This section is at the end of the file so no later test
+-- accidentally inherits the non-UTC zone.
+SET TIME ZONE 'America/Los_Angeles';
+SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-02-15 10:00:00'),
+    (TIMESTAMP '2024-03-15 10:00:00'),
+    (TIMESTAMP '2024-04-15 10:00:00') tab(t)
+  ORDER BY t;
+SELECT t, time_bucket(INTERVAL '1' MONTH, CAST(t AS TIMESTAMP_NTZ)) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-02-15 10:00:00'),
+    (TIMESTAMP '2024-03-15 10:00:00'),
+    (TIMESTAMP '2024-04-15 10:00:00') tab(t)
+  ORDER BY t;
+-- Daily bucket on LTZ across the spring-forward day: each ts buckets to local midnight.
+SELECT t, time_bucket(INTERVAL '1' DAY, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-03-09 12:00:00'),
+    (TIMESTAMP '2024-03-10 12:00:00'),
+    (TIMESTAMP '2024-03-11 12:00:00') tab(t)
+  ORDER BY t;
+-- Daily bucket on LTZ across the fall-back day (2024-11-03): each ts buckets to local
+-- midnight of its own day even though Nov 3 spans 25 UTC hours.
+SELECT t, time_bucket(INTERVAL '1' DAY, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-11-02 12:00:00'),
+    (TIMESTAMP '2024-11-03 12:00:00'),
+    (TIMESTAMP '2024-11-04 12:00:00') tab(t)
+  ORDER BY t;
+-- Compound day-time bucket (36h) on LTZ across the fall-back day.
+SELECT t, time_bucket(INTERVAL '36' HOUR, t, TIMESTAMP '2024-11-01 00:00:00') AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-11-05 11:30:00') tab(t);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql
index 6de22b8b7c3de..86efa8fa338b4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql
@@ -56,6 +56,11 @@ SELECT length('four') FROM t;
 SELECT length(date('1996-09-10')) FROM t;
 SELECT length(timestamp('1996-09-10 10:11:12.4')) FROM t;
 
+-- string to time
+SELECT '12:00:00' = TIME'12:00:00' FROM t;
+SELECT '12:00:01' > TIME'12:00:00' FROM t;
+SELECT time_trunc('HOUR', '12:34:56') FROM t;
+
 -- extract
 SELECT year( '1996-01-10') FROM t;
 SELECT month( '1996-01-10') FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 36985a0ec628a..3d5db15a48113 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -707,7 +707,7 @@ struct<plan:string>
 -- !query output
 == Parsed Logical Plan ==
 'DescribeRelation false, [col_name#x, data_type#x, comment#x]
-+- 'UnresolvedTableOrView [t], DESCRIBE TABLE, true
++- 'UnresolvedTableOrView [t], DESCRIBE TABLE, true, QueryLike
 
 == Analyzed Logical Plan ==
 col_name: string, data_type: string, comment: string
diff --git a/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out b/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out
new file mode 100644
index 0000000000000..e410666845123
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/extract-value-nulltype-single-pass.sql.out
@@ -0,0 +1,7 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query schema
+struct<a:void>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out b/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out
index 0565edc99b95d..4d6cbb936d778 100644
--- a/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/extract-value-resolution-edge-cases.sql.out
@@ -37,3 +37,27 @@ DROP TABLE t1
 struct<>
 -- !query output
 
+
+
+-- !query
+SELECT col.a FROM (SELECT null AS col) t
+-- !query schema
+struct<a:void>
+-- !query output
+NULL
+
+
+-- !query
+SELECT col[0] FROM (SELECT null AS col) t
+-- !query schema
+struct<col[0]:void>
+-- !query output
+NULL
+
+
+-- !query
+SELECT col['key'] FROM (SELECT null AS col) t
+-- !query schema
+struct<col[key]:void>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out b/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out
index f685076f9f30c..df08b3837b553 100644
--- a/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having-and-order-by-recursive-type-name-resolution.sql.out
@@ -427,3 +427,11 @@ ORDER BY col1.nums[0]
 struct<sum_val:int>
 -- !query output
 30
+
+
+-- !query
+SELECT NAMED_STRUCT('a', 1) AS col1 FROM VALUES (NULL) t (col1) GROUP BY col1 HAVING col1.a == 1
+-- !query schema
+struct<col1:struct<a:int>>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
index 11bafb2cf63c9..b8af8dfea2211 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
@@ -1905,6 +1905,87 @@ struct<1:int>
 1
 
 
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+     nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k)
+-- !query schema
+struct<k:int,v1:string,v2:string,unq_k:int>
+-- !query output
+1	a	b	1
+
+
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+     nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k, nj1.k AS qual_nj1k, nj2.k AS qual_nj2k)
+-- !query schema
+struct<k:int,v1:string,v2:string,unq_k:int,qual_nj1k:int,qual_nj2k:int>
+-- !query output
+1	a	b	1	1	1
+
+
+-- !query
+WITH uj1(k, v1) AS (VALUES (1, 'a')),
+     uj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM uj1 JOIN uj2 USING (k),
+LATERAL (SELECT k AS unq_k, uj1.k AS qual_uj1k, uj2.k AS qual_uj2k)
+-- !query schema
+struct<k:int,v1:string,v2:string,unq_k:int,qual_uj1k:int,qual_uj2k:int>
+-- !query output
+1	a	b	1	1	1
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'))
+SELECT * FROM (SELECT k FROM cte1 ORDER BY v1) sub,
+LATERAL (SELECT v1 AS leaked)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`v1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 107,
+    "stopIndex" : 108,
+    "fragment" : "v1"
+  } ]
+}
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'), (2, 'b'), (3, 'c'))
+SELECT * FROM (SELECT k FROM cte1 GROUP BY k) g,
+LATERAL (SELECT v1 AS leaked)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`v1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 124,
+    "stopIndex" : 125,
+    "fragment" : "v1"
+  } ]
+}
+
+
 -- !query
 DROP VIEW t1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/join-nearest-by.sql.out b/sql/core/src/test/resources/sql-tests/results/join-nearest-by.sql.out
new file mode 100644
index 0000000000000..81803d1396726
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/join-nearest-by.sql.out
@@ -0,0 +1,433 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE VIEW users(user_id, score) AS VALUES (1, 10.0), (2, 20.0), (3, 30.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW products(product, pscore) AS VALUES ('A', 11.0), ('B', 22.0), ('C', 5.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,product:string>
+-- !query output
+1	A
+2	B
+3	B
+
+
+-- !query
+SELECT *
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,score:decimal(3,1),product:string,pscore:decimal(3,1)>
+-- !query output
+1	10.0	A	11.0
+2	20.0	B	22.0
+3	30.0	B	22.0
+
+
+-- !query
+SELECT *
+FROM users u LEFT OUTER JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,score:decimal(3,1),product:string,pscore:decimal(3,1)>
+-- !query output
+1	10.0	A	11.0
+2	20.0	B	22.0
+3	30.0	B	22.0
+
+
+-- !query
+SELECT u.user_id, p.product, p.pscore
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,product:string,pscore:decimal(3,1)>
+-- !query output
+1	A	11.0
+1	C	5.0
+2	A	11.0
+2	B	22.0
+3	A	11.0
+3	B	22.0
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u INNER JOIN products p
+  EXACT NEAREST BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,product:string>
+-- !query output
+1	A
+2	B
+3	B
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u LEFT OUTER JOIN (SELECT * FROM products WHERE false) p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,product:string>
+-- !query output
+1	NULL
+2	NULL
+3	NULL
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u INNER JOIN (SELECT * FROM products WHERE false) p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,product:string>
+-- !query output
+
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u INNER JOIN products p
+  APPROX NEAREST 1 BY DISTANCE abs(u.score - p.pscore)
+-- !query schema
+struct<user_id:int,product:string>
+-- !query output
+1	A
+2	B
+3	B
+
+
+-- !query
+SELECT a.user_id AS a_id, b.user_id AS b_id
+FROM users a JOIN users b
+  APPROX NEAREST 1 BY DISTANCE abs(a.score - b.score)
+ORDER BY a.user_id, b.user_id
+-- !query schema
+struct<a_id:int,b_id:int>
+-- !query output
+1	1
+2	2
+3	3
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u RIGHT OUTER JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "joinType" : "RIGHT OUTER",
+    "supported" : "'INNER', 'LEFT OUTER'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 126,
+    "fragment" : "RIGHT OUTER JOIN products p\n  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 0 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "max" : "100000",
+    "min" : "1",
+    "numResults" : "0"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 114,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 0 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 100001 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "max" : "100000",
+    "min" : "1",
+    "numResults" : "100001"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 119,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 100001 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY map(u.score, p.pscore)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "NEAREST_BY_JOIN.NON_ORDERABLE_RANKING_EXPRESSION",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "expression" : "\"map(score, pscore)\"",
+    "type" : "\"MAP<DECIMAL(3,1), DECIMAL(3,1)>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 112,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 1 BY SIMILARITY map(u.score, p.pscore)"
+  } ]
+}
+
+
+-- !query
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    EXACT NEAREST 1 BY SIMILARITY rand() + p.pscore
+)
+-- !query schema
+struct<num_rows:bigint>
+-- !query output
+3
+
+
+-- !query
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    APPROX NEAREST 1 BY SIMILARITY rand() + p.pscore
+)
+-- !query schema
+struct<num_rows:bigint>
+-- !query output
+3
+
+
+-- !query
+SELECT COUNT(*) AS num_rows
+FROM (
+  SELECT u.user_id, p.product
+  FROM users u JOIN products p
+    APPROX NEAREST 2 BY DISTANCE rand() + p.pscore
+)
+-- !query schema
+struct<num_rows:bigint>
+-- !query output
+6
+
+
+-- !query
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY rand(0) + p.pscore
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+AdaptiveSparkPlan isFinalPlan=false
++- Project [user_id#x, product#x]
+   +- Generate inline(__nearest_matches__#x), [user_id#x], false, [product#x, pscore#x]
+      +- Filter ((size(__nearest_matches__#x, false) > 0) AND isnotnull(__nearest_matches__#x))
+         +- SortAggregate(key=[__qid#x], functions=[first(user_id#x, false), max_by(named_struct(product, product#x, pscore, pscore#x), __ranking__#x, 1, false, 0, 0)])
+            +- Sort [__qid#x ASC NULLS FIRST], false, 0
+               +- Exchange hashpartitioning(__qid#x, 4), ENSURE_REQUIREMENTS, [plan_id=x]
+                  +- SortAggregate(key=[__qid#x], functions=[partial_first(user_id#x, false), partial_max_by(named_struct(product, product#x, pscore, pscore#x), __ranking__#x, 1, false, 0, 0)])
+                     +- Sort [__qid#x ASC NULLS FIRST], false, 0
+                        +- Project [user_id#x, __qid#x, product#x, pscore#x, (rand(0) + cast(pscore#x as double)) AS __ranking__#x]
+                           +- BroadcastNestedLoopJoin BuildLeft, Inner
+                              :- BroadcastExchange IdentityBroadcastMode, [plan_id=x]
+                              :  +- Project [col1#x AS user_id#x, uuid(Some(x)) AS __qid#x]
+                              :     +- LocalTableScan [col1#x, col2#x]
+                              +- Project [col1#x AS product#x, col2#x AS pscore#x]
+                                 +- LocalTableScan [col1#x, col2#x]
+
+
+-- !query
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+WHERE u.user_id > 1
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+AdaptiveSparkPlan isFinalPlan=false
++- Project [user_id#x, product#x]
+   +- Generate inline(__nearest_matches__#x), [user_id#x], false, [product#x, pscore#x]
+      +- Filter ((size(__nearest_matches__#x, false) > 0) AND isnotnull(__nearest_matches__#x))
+         +- SortAggregate(key=[__qid#x], functions=[first(user_id#x, false), min_by(named_struct(product, product#x, pscore, pscore#x), abs((score#x - pscore#x)), 2, true, 0, 0)])
+            +- Sort [__qid#x ASC NULLS FIRST], false, 0
+               +- Exchange hashpartitioning(__qid#x, 4), ENSURE_REQUIREMENTS, [plan_id=x]
+                  +- SortAggregate(key=[__qid#x], functions=[partial_first(user_id#x, false), partial_min_by(named_struct(product, product#x, pscore, pscore#x), abs((score#x - pscore#x)), 2, true, 0, 0)])
+                     +- Sort [__qid#x ASC NULLS FIRST], false, 0
+                        +- BroadcastNestedLoopJoin BuildRight, Inner
+                           :- Filter (user_id#x > 1)
+                           :  +- Project [col1#x AS user_id#x, col2#x AS score#x, uuid(Some(x)) AS __qid#x]
+                           :     +- LocalTableScan [col1#x, col2#x]
+                           +- BroadcastExchange IdentityBroadcastMode, [plan_id=x]
+                              +- Project [col1#x AS product#x, col2#x AS pscore#x]
+                                 +- LocalTableScan [col1#x, col2#x]
+
+
+-- !query
+EXPLAIN
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+WHERE p.product != 'C'
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+AdaptiveSparkPlan isFinalPlan=false
++- Project [user_id#x, product#x]
+   +- Filter (isnotnull(product#x) AND NOT (product#x = C))
+      +- Generate inline(__nearest_matches__#x), [user_id#x], false, [product#x, pscore#x]
+         +- Filter ((size(__nearest_matches__#x, false) > 0) AND isnotnull(__nearest_matches__#x))
+            +- SortAggregate(key=[__qid#x], functions=[first(user_id#x, false), min_by(named_struct(product, product#x, pscore, pscore#x), abs((score#x - pscore#x)), 2, true, 0, 0)])
+               +- Sort [__qid#x ASC NULLS FIRST], false, 0
+                  +- Exchange hashpartitioning(__qid#x, 4), ENSURE_REQUIREMENTS, [plan_id=x]
+                     +- SortAggregate(key=[__qid#x], functions=[partial_first(user_id#x, false), partial_min_by(named_struct(product, product#x, pscore, pscore#x), abs((score#x - pscore#x)), 2, true, 0, 0)])
+                        +- Sort [__qid#x ASC NULLS FIRST], false, 0
+                           +- BroadcastNestedLoopJoin BuildRight, Inner
+                              :- Project [col1#x AS user_id#x, col2#x AS score#x, uuid(Some(x)) AS __qid#x]
+                              :  +- LocalTableScan [col1#x, col2#x]
+                              +- BroadcastExchange IdentityBroadcastMode, [plan_id=x]
+                                 +- Project [col1#x AS product#x, col2#x AS pscore#x]
+                                    +- LocalTableScan [col1#x, col2#x]
+
+
+-- !query
+SET spark.sql.crossJoin.enabled = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.crossJoin.enabled	false
+
+
+-- !query
+SELECT u.user_id, p.product
+FROM users u JOIN products p
+  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "NEAREST_BY_JOIN.CROSS_JOIN_NOT_ENABLED",
+  "sqlState" : "42604",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 114,
+    "fragment" : "JOIN products p\n  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)"
+  } ]
+}
+
+
+-- !query
+SET spark.sql.crossJoin.enabled = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.crossJoin.enabled	true
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW tied_products(product, pscore)
+  AS VALUES ('A', 10.0), ('B', 10.0), ('C', 10.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT u.user_id, COUNT(*) AS num_matches
+FROM users u JOIN tied_products p
+  APPROX NEAREST 2 BY DISTANCE abs(u.score - p.pscore)
+GROUP BY u.user_id
+ORDER BY u.user_id
+-- !query schema
+struct<user_id:int,num_matches:bigint>
+-- !query output
+1	2
+2	2
+3	2
+
+
+-- !query
+DROP VIEW tied_products
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW users
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW products
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
index 11a103e6cc0e6..6bcbdd2840f90 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
@@ -15,6 +15,7 @@ AND	true
 ANTI	false
 ANY	true
 ANY_VALUE	false
+APPROX	false
 ARCHIVE	false
 ARRAY	false
 AS	true
@@ -24,6 +25,7 @@ AT	false
 ATOMIC	false
 AUTHORIZATION	true
 BEGIN	false
+BERNOULLI	false
 BETWEEN	false
 BIGINT	false
 BINARY	false
@@ -110,6 +112,7 @@ DETERMINISTIC	false
 DFS	false
 DIRECTORIES	false
 DIRECTORY	false
+DISTANCE	false
 DISTINCT	true
 DISTRIBUTE	false
 DIV	false
@@ -123,6 +126,7 @@ ENFORCED	false
 ESCAPE	true
 ESCAPED	false
 EVOLUTION	false
+EXACT	false
 EXCEPT	true
 EXCHANGE	false
 EXCLUDE	false
@@ -243,6 +247,7 @@ NAMESPACES	false
 NANOSECOND	false
 NANOSECONDS	false
 NATURAL	true
+NEAREST	false
 NEXT	false
 NO	false
 NONE	false
@@ -331,6 +336,7 @@ SET	false
 SETS	false
 SHORT	false
 SHOW	false
+SIMILARITY	false
 SINGLE	false
 SKEWED	false
 SMALLINT	false
@@ -353,6 +359,7 @@ STRUCT	false
 SUBSTR	false
 SUBSTRING	false
 SYNC	false
+SYSTEM	false
 SYSTEM_PATH	false
 SYSTEM_TIME	false
 SYSTEM_VERSION	false
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
index 1a7db9df073f4..a010343264469 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
@@ -15,6 +15,7 @@ AND	false
 ANTI	false
 ANY	false
 ANY_VALUE	false
+APPROX	false
 ARCHIVE	false
 ARRAY	false
 AS	false
@@ -24,6 +25,7 @@ AT	false
 ATOMIC	false
 AUTHORIZATION	false
 BEGIN	false
+BERNOULLI	false
 BETWEEN	false
 BIGINT	false
 BINARY	false
@@ -110,6 +112,7 @@ DETERMINISTIC	false
 DFS	false
 DIRECTORIES	false
 DIRECTORY	false
+DISTANCE	false
 DISTINCT	false
 DISTRIBUTE	false
 DIV	false
@@ -123,6 +126,7 @@ ENFORCED	false
 ESCAPE	false
 ESCAPED	false
 EVOLUTION	false
+EXACT	false
 EXCEPT	false
 EXCHANGE	false
 EXCLUDE	false
@@ -243,6 +247,7 @@ NAMESPACES	false
 NANOSECOND	false
 NANOSECONDS	false
 NATURAL	false
+NEAREST	false
 NEXT	false
 NO	false
 NONE	false
@@ -331,6 +336,7 @@ SET	false
 SETS	false
 SHORT	false
 SHOW	false
+SIMILARITY	false
 SINGLE	false
 SKEWED	false
 SMALLINT	false
@@ -353,6 +359,7 @@ STRUCT	false
 SUBSTR	false
 SUBSTRING	false
 SYNC	false
+SYSTEM	false
 SYSTEM_PATH	false
 SYSTEM_TIME	false
 SYSTEM_VERSION	false
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
index 1a7db9df073f4..a010343264469 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
@@ -15,6 +15,7 @@ AND	false
 ANTI	false
 ANY	false
 ANY_VALUE	false
+APPROX	false
 ARCHIVE	false
 ARRAY	false
 AS	false
@@ -24,6 +25,7 @@ AT	false
 ATOMIC	false
 AUTHORIZATION	false
 BEGIN	false
+BERNOULLI	false
 BETWEEN	false
 BIGINT	false
 BINARY	false
@@ -110,6 +112,7 @@ DETERMINISTIC	false
 DFS	false
 DIRECTORIES	false
 DIRECTORY	false
+DISTANCE	false
 DISTINCT	false
 DISTRIBUTE	false
 DIV	false
@@ -123,6 +126,7 @@ ENFORCED	false
 ESCAPE	false
 ESCAPED	false
 EVOLUTION	false
+EXACT	false
 EXCEPT	false
 EXCHANGE	false
 EXCLUDE	false
@@ -243,6 +247,7 @@ NAMESPACES	false
 NANOSECOND	false
 NANOSECONDS	false
 NATURAL	false
+NEAREST	false
 NEXT	false
 NO	false
 NONE	false
@@ -331,6 +336,7 @@ SET	false
 SETS	false
 SHORT	false
 SHOW	false
+SIMILARITY	false
 SINGLE	false
 SKEWED	false
 SMALLINT	false
@@ -353,6 +359,7 @@ STRUCT	false
 SUBSTR	false
 SUBSTRING	false
 SYNC	false
+SYSTEM	false
 SYSTEM_PATH	false
 SYSTEM_TIME	false
 SYSTEM_VERSION	false
diff --git a/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence-legacy.sql.out
new file mode 100644
index 0000000000000..c47ba461ba6cc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence-legacy.sql.out
@@ -0,0 +1,131 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_user FROM v_user
+-- !query schema
+struct<current_user:string>
+-- !query output
+admin.admin
+
+
+-- !query
+SELECT current_time FROM v_time
+-- !query schema
+struct<current_time:int>
+-- !query output
+0
+
+
+-- !query
+SELECT 'abc' AS current_user, current_user = current_user() AS function_won
+-- !query schema
+struct<current_user:string,function_won:boolean>
+-- !query output
+abc	true
+
+
+-- !query
+SELECT (SELECT current_user) = current_user() AS function_won FROM v_user
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+DECLARE current_user = 'abc'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_user, current_user FROM v_user
+-- !query schema
+struct<current_user:string,current_user:string>
+-- !query output
+admin.admin	admin.admin
+
+
+-- !query
+DROP TEMPORARY VARIABLE current_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_date)
+SELECT typeof((SELECT current_date)) FROM t1
+-- !query schema
+struct<typeof(scalarsubquery()):string>
+-- !query output
+date
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_timestamp)
+SELECT typeof((SELECT current_timestamp)) FROM t1
+-- !query schema
+struct<typeof(scalarsubquery()):string>
+-- !query output
+timestamp
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS user)
+SELECT (SELECT user) = current_user() AS function_won FROM t1
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS session_user)
+SELECT (SELECT session_user) = current_user() AS function_won FROM t1
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user))
+-- !query schema
+struct<typeof(grouping_id()):string>
+-- !query output
+bigint
+
+
+-- !query
+DROP VIEW v_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v_time
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence.sql.out
new file mode 100644
index 0000000000000..c47ba461ba6cc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/parameterless-function-name-precedence.sql.out
@@ -0,0 +1,131 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_user AS SELECT 'admin.admin' AS current_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v_time AS SELECT CAST(0 AS INT) AS current_time
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_user FROM v_user
+-- !query schema
+struct<current_user:string>
+-- !query output
+admin.admin
+
+
+-- !query
+SELECT current_time FROM v_time
+-- !query schema
+struct<current_time:int>
+-- !query output
+0
+
+
+-- !query
+SELECT 'abc' AS current_user, current_user = current_user() AS function_won
+-- !query schema
+struct<current_user:string,function_won:boolean>
+-- !query output
+abc	true
+
+
+-- !query
+SELECT (SELECT current_user) = current_user() AS function_won FROM v_user
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+DECLARE current_user = 'abc'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_user, current_user FROM v_user
+-- !query schema
+struct<current_user:string,current_user:string>
+-- !query output
+admin.admin	admin.admin
+
+
+-- !query
+DROP TEMPORARY VARIABLE current_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_date)
+SELECT typeof((SELECT current_date)) FROM t1
+-- !query schema
+struct<typeof(scalarsubquery()):string>
+-- !query output
+date
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS current_timestamp)
+SELECT typeof((SELECT current_timestamp)) FROM t1
+-- !query schema
+struct<typeof(scalarsubquery()):string>
+-- !query output
+timestamp
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS user)
+SELECT (SELECT user) = current_user() AS function_won FROM t1
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+WITH t1 AS (SELECT 1 AS session_user)
+SELECT (SELECT session_user) = current_user() AS function_won FROM t1
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT typeof(grouping__id) FROM v_user GROUP BY current_user GROUPING SETS ((current_user))
+-- !query schema
+struct<typeof(grouping_id()):string>
+-- !query output
+bigint
+
+
+-- !query
+DROP VIEW v_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v_time
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out
new file mode 100644
index 0000000000000..20d74b1552582
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out
@@ -0,0 +1,1234 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+system.builtin,system.session,spark_catalog.default
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+spark_catalog.default,system.builtin
+
+
+-- !query
+SET PATH = Spark_Catalog.Default, System.Builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+Spark_Catalog.Default,System.Builtin
+
+
+-- !query
+SET PATH = spark_catalog.`sch.b`, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+spark_catalog.`sch.b`,system.builtin
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+system.builtin,system.session,spark_catalog.default
+
+
+-- !query
+SET PATH = SYSTEM_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+system.builtin
+
+
+-- !query
+USE spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = SYSTEM_PATH, CURRENT_SCHEMA
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+system.builtin,spark_catalog.default
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = PATH, system.session
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+spark_catalog.default,system.builtin,system.session
+
+
+-- !query
+USE spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = current_schema, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+spark_catalog.default,system.builtin
+
+
+-- !query
+SET PATH = current_database, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path()
+-- !query schema
+struct<current_path():string>
+-- !query output
+spark_catalog.default,system.builtin
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT CURRENT_PATH = current_path() AS ansi_form_matches
+-- !query schema
+struct<ansi_form_matches:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT current_path(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "0",
+    "functionName" : "`current_path`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "current_path(1)"
+  } ]
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.default, spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "spark_catalog.default"
+  }
+}
+
+
+-- !query
+SET PATH = spark_catalog.DEFAULT, spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "spark_catalog.default"
+  }
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH, system.builtin
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "system.builtin"
+  }
+}
+
+
+-- !query
+SET PATH = SYSTEM_PATH, SYSTEM_PATH
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "system.builtin"
+  }
+}
+
+
+-- !query
+SET PATH = current_database, current_schema
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DUPLICATE_SQL_PATH_ENTRY",
+  "sqlState" : "42732",
+  "messageParameters" : {
+    "pathEntry" : "current_schema"
+  }
+}
+
+
+-- !query
+SET PATH = my_schema_no_catalog
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "qualifiedName" : "my_schema_no_catalog"
+  }
+}
+
+
+-- !query
+CREATE SCHEMA sql_path_routines
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT pick()
+-- !query schema
+struct<spark_catalog.sql_path_routines.pick():int>
+-- !query output
+7
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION sql_path_routines.pick_tvf()
+RETURNS TABLE(val INT)
+RETURN SELECT 7 AS val
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM pick_tvf()
+-- !query schema
+struct<val:int>
+-- !query output
+7
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA sql_path_routines_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT pick() AS from_first_schema
+-- !query schema
+struct<from_first_schema:int>
+-- !query output
+7
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT pick() AS from_first_schema
+-- !query schema
+struct<from_first_schema:int>
+-- !query output
+11
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT pick()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`pick`",
+    "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "pick()"
+  } ]
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION sql_path_routines.pick
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION sql_path_routines.pick_tvf
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION sql_path_routines_b.pick
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_routines
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_routines_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA sql_path_relations_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA sql_path_relations_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT id FROM tbl AS from_first_schema
+-- !query schema
+struct<id:int>
+-- !query output
+1
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT id FROM tbl AS from_first_schema
+-- !query schema
+struct<id:int>
+-- !query output
+2
+
+
+-- !query
+SET PATH = spark_catalog.default, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT id FROM tbl
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`tbl`",
+    "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 18,
+    "fragment" : "tbl"
+  } ]
+}
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE sql_path_relations_a.tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE sql_path_relations_b.tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_relations_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_relations_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA sql_path_views_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA sql_path_views_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_views_a, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_views_b, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT id FROM frozen_t AS bare_lookup_uses_live_path
+-- !query schema
+struct<id:int>
+-- !query output
+2
+
+
+-- !query
+SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path
+-- !query schema
+struct<id:int>
+-- !query output
+1
+
+
+-- !query
+USE spark_catalog.sql_path_views_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW sql_path_views_a.v_ctx AS
+SELECT current_schema() AS cs, current_path() AS cp
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+USE spark_catalog.sql_path_views_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT cs, cp FROM sql_path_views_a.v_ctx
+-- !query schema
+struct<cs:string,cp:string>
+-- !query output
+sql_path_views_b	system.builtin,system.session,spark_catalog.sql_path_views_b
+
+
+-- !query
+USE spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW default.v_path_frozen
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW sql_path_views_a.v_ctx
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE sql_path_views_a.frozen_t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE sql_path_views_b.frozen_t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_views_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_views_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA sql_path_fn_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA sql_path_fn_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_a, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION default.frozen_fn()
+RETURNS INT
+RETURN (SELECT MAX(id) FROM frozen_t)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_b, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path
+-- !query schema
+struct<max(id):int>
+-- !query output
+20
+
+
+-- !query
+SELECT default.frozen_fn() AS scalar_body_uses_frozen_path
+-- !query schema
+struct<scalar_body_uses_frozen_path:int>
+-- !query output
+10
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_a, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION default.frozen_tvf()
+RETURNS TABLE(id INT)
+RETURN SELECT MAX(id) AS id FROM frozen_t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = spark_catalog.sql_path_fn_b, system.builtin
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path
+-- !query schema
+struct<id:int>
+-- !query output
+10
+
+
+-- !query
+USE spark_catalog.sql_path_fn_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION sql_path_fn_a.f_ctx()
+RETURNS STRING
+RETURN concat(current_schema(), '::', current_path())
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+USE spark_catalog.sql_path_fn_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT sql_path_fn_a.f_ctx() AS invoker_context
+-- !query schema
+struct<invoker_context:string>
+-- !query output
+sql_path_fn_b::system.builtin,system.session,spark_catalog.sql_path_fn_b
+
+
+-- !query
+USE spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION default.frozen_fn
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION default.frozen_tvf
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION sql_path_fn_a.f_ctx
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE sql_path_fn_a.frozen_t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE sql_path_fn_b.frozen_t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_fn_a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA sql_path_fn_b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET spark.sql.defaultPath = system.session, system.builtin
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.defaultPath	system.session, system.builtin
+
+
+-- !query
+SET PATH = system.builtin, system.session
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path() AS explicit_set_path_wins_over_conf
+-- !query schema
+struct<explicit_set_path_wins_over_conf:string>
+-- !query output
+system.builtin,system.session
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+RESET spark.sql.defaultPath
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET spark.sql.defaultPath = system.session, system.builtin, current_schema
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.defaultPath	system.session, system.builtin, current_schema
+
+
+-- !query
+USE spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT current_path() AS default_path_expands_to_conf
+-- !query schema
+struct<default_path_expands_to_conf:string>
+-- !query output
+system.session,system.builtin,spark_catalog.default
+
+
+-- !query
+RESET spark.sql.defaultPath
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET PATH = DEFAULT_PATH
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET spark.sql.defaultPath = this is not a path
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT",
+  "sqlState" : "22022",
+  "messageParameters" : {
+    "confName" : "spark.sql.defaultPath",
+    "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).",
+    "confValue" : "this is not a path"
+  }
+}
+
+
+-- !query
+SET spark.sql.defaultPath = PATH, system.builtin
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT",
+  "sqlState" : "22022",
+  "messageParameters" : {
+    "confName" : "spark.sql.defaultPath",
+    "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).",
+    "confValue" : "PATH, system.builtin"
+  }
+}
+
+
+-- !query
+SET spark.sql.path.enabled = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.path.enabled	false
+
+
+-- !query
+SELECT current_path() IS NOT NULL AS has_path
+-- !query schema
+struct<has_path:boolean>
+-- !query output
+true
+
+
+-- !query
+SET PATH = spark_catalog.default
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "config" : "spark.sql.path.enabled"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
index de8d6743fc761..3357f2e526305 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
@@ -561,6 +561,60 @@ struct<scalarsubquery(title):bigint>
 1
 
 
+-- !query
+SET VARIABLE title = 'Dropped struct variable -- field access vs qualified name'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DECLARE OR REPLACE VARIABLE session STRUCT<a INT> = NAMED_STRUCT('a', 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT session.a
+-- !query schema
+struct<a:int>
+-- !query output
+1
+
+
+-- !query
+DROP TEMPORARY VARIABLE session
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT session.a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`session`.`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "session.a"
+  } ]
+}
+
+
 -- !query
 SET VARIABLE title = 'Test qualifiers - fail'
 -- !query schema
@@ -579,9 +633,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`system`.`session`",
+    "searchPath" : "[`system`.`session`]",
     "variableName" : "`builtin`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 40,
+    "fragment" : "builtin.var1"
+  } ]
 }
 
 
@@ -595,9 +656,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`system`.`session`",
+    "searchPath" : "[`system`.`session`]",
     "variableName" : "`system`.`sesion`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 46,
+    "fragment" : "system.sesion.var1"
+  } ]
 }
 
 
@@ -611,9 +679,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`system`.`session`",
+    "searchPath" : "[`system`.`session`]",
     "variableName" : "`sys`.`session`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 44,
+    "fragment" : "sys.session.var1"
+  } ]
 }
 
 
@@ -723,9 +798,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`SYSTEM`.`SESSION`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]",
     "variableName" : "`ses`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 21,
+    "fragment" : "ses.var1"
+  } ]
 }
 
 
@@ -739,9 +821,16 @@ org.apache.spark.sql.AnalysisException
   "errorClass" : "UNRESOLVED_VARIABLE",
   "sqlState" : "42883",
   "messageParameters" : {
-    "searchPath" : "`SYSTEM`.`SESSION`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]",
     "variableName" : "`builtn`.`session`.`var1`"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 32,
+    "fragment" : "builtn.session.var1"
+  } ]
 }
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence-legacy.sql.out
new file mode 100644
index 0000000000000..92d2370e2b980
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence-legacy.sql.out
@@ -0,0 +1,327 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query schema
+struct<identity_fn(42):int>
+-- !query output
+42
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT col_vs_param(42)
+-- !query schema
+struct<col_vs_param(42):int>
+-- !query output
+1
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING)
+RETURNS STRING RETURN current_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+false
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT)
+RETURNS STRING RETURN typeof(current_date)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT paramless_vs_param_date(42)
+-- !query schema
+struct<paramless_vs_param_date(42):string>
+-- !query output
+int
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT)
+RETURNS STRING RETURN typeof(current_time)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT paramless_vs_param_time(42)
+-- !query schema
+struct<paramless_vs_param_time(42):string>
+-- !query output
+int
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT)
+RETURNS INT RETURN grouping__id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT paramless_vs_param_grouping(42)
+-- !query schema
+struct<paramless_vs_param_grouping(42):int>
+-- !query output
+42
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT)
+RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT lca_vs_param(42)
+-- !query schema
+struct<lca_vs_param(42):int>
+-- !query output
+1000
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT)
+RETURNS INT RETURN (SELECT (SELECT x) FROM v1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT outer_vs_param(42)
+-- !query schema
+struct<outer_vs_param(42):int>
+-- !query output
+1
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT)
+RETURNS INT RETURN (SELECT (SELECT x))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT outer_param_pure(42)
+-- !query schema
+struct<outer_param_pure(42):int>
+-- !query output
+42
+
+
+-- !query
+DECLARE x = 999
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query schema
+struct<identity_fn(42):int>
+-- !query output
+42
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT outer_fn(42)
+-- !query schema
+struct<outer_fn(42):int>
+-- !query output
+999
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING)
+RETURNS TABLE(c STRING) RETURN SELECT current_user AS c
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored')
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+DROP TEMPORARY VARIABLE x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION identity_fn
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION col_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_date
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_time
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_grouping
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION lca_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_param_pure
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION inner_fn
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_fn
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION tvf_paramless_vs_param
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence.sql.out
new file mode 100644
index 0000000000000..2baca91aa987b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/sql-udf-name-precedence.sql.out
@@ -0,0 +1,360 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION identity_fn(x INT) RETURNS INT RETURN x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query schema
+struct<identity_fn(42):int>
+-- !query output
+42
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION col_vs_param(x INT) RETURNS INT RETURN (SELECT x FROM v1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT col_vs_param(42)
+-- !query schema
+struct<col_vs_param(42):int>
+-- !query output
+1
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param(current_user STRING)
+RETURNS STRING RETURN current_user
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT paramless_vs_param('should_be_ignored') = current_user() AS function_won
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_date(current_date INT)
+RETURNS STRING RETURN typeof(current_date)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT paramless_vs_param_date(42)
+-- !query schema
+struct<paramless_vs_param_date(42):string>
+-- !query output
+date
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_time(current_time INT)
+RETURNS STRING RETURN typeof(current_time)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT paramless_vs_param_time(42)
+-- !query schema
+struct<paramless_vs_param_time(42):string>
+-- !query output
+time(6)
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION paramless_vs_param_grouping(grouping__id INT)
+RETURNS INT RETURN grouping__id
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION",
+  "sqlState" : "42K0E",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 12,
+    "fragment" : "grouping__id"
+  } ]
+}
+
+
+-- !query
+SELECT paramless_vs_param_grouping(42)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`paramless_vs_param_grouping`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "paramless_vs_param_grouping(42)"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION lca_vs_param(x INT)
+RETURNS INT RETURN (SELECT y FROM (SELECT 999 AS x, x + 1 AS y))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT lca_vs_param(42)
+-- !query schema
+struct<lca_vs_param(42):int>
+-- !query output
+1000
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_vs_param(x INT)
+RETURNS INT RETURN (SELECT (SELECT x) FROM v1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT outer_vs_param(42)
+-- !query schema
+struct<outer_vs_param(42):int>
+-- !query output
+1
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_param_pure(x INT)
+RETURNS INT RETURN (SELECT (SELECT x))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT outer_param_pure(42)
+-- !query schema
+struct<outer_param_pure(42):int>
+-- !query output
+42
+
+
+-- !query
+DECLARE x = 999
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT identity_fn(42)
+-- !query schema
+struct<identity_fn(42):int>
+-- !query output
+42
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION inner_fn(y INT) RETURNS INT RETURN x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION outer_fn(x INT) RETURNS INT RETURN inner_fn(x)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT outer_fn(42)
+-- !query schema
+struct<outer_fn(42):int>
+-- !query output
+999
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY FUNCTION tvf_paramless_vs_param(current_user STRING)
+RETURNS TABLE(c STRING) RETURN SELECT current_user AS c
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT c = current_user() AS function_won FROM tvf_paramless_vs_param('should_be_ignored')
+-- !query schema
+struct<function_won:boolean>
+-- !query output
+true
+
+
+-- !query
+DROP TEMPORARY VARIABLE x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION identity_fn
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION col_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_date
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_time
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION paramless_vs_param_grouping
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException
+{
+  "errorClass" : "ROUTINE_NOT_FOUND",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`paramless_vs_param_grouping`"
+  }
+}
+
+
+-- !query
+DROP TEMPORARY FUNCTION lca_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_vs_param
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_param_pure
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION inner_fn
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION outer_fn
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TEMPORARY FUNCTION tvf_paramless_vs_param
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
index a5e7965c10f40..b227fd3c9475c 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
@@ -823,7 +823,9 @@ org.apache.spark.SparkException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 14,
+    "fragment" : "foo51()"
   } ]
 }
 
@@ -956,7 +958,9 @@ org.apache.spark.SparkRuntimeException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "foo9a('Nonsense')"
   } ]
 }
 
@@ -1209,7 +1213,9 @@ org.apache.spark.SparkArithmeticException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "foo9f(999)"
   } ]
 }
 
@@ -1232,7 +1238,9 @@ org.apache.spark.SparkArithmeticException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "foo9f(999 + 1)"
   } ]
 }
 
@@ -1271,7 +1279,9 @@ org.apache.spark.SparkNumberFormatException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "foo9g('hello', '7')"
   } ]
 }
 
@@ -1294,7 +1304,9 @@ org.apache.spark.SparkNumberFormatException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "foo9g(123.23, 'q')"
   } ]
 }
 
@@ -1333,7 +1345,9 @@ org.apache.spark.SparkNumberFormatException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "foo9h('hello', '7')"
   } ]
 }
 
@@ -1356,7 +1370,9 @@ org.apache.spark.SparkNumberFormatException
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "fragment" : ""
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "foo9h(123.23, 'q')"
   } ]
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index 11ca99bc23047..e9cc3242225a4 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -574,7 +574,7 @@ struct<result:string>
 -- !query
 SELECT ST_AsBinary(ST_GeogFromWKB(NULL))
 -- !query schema
-struct<st_asbinary(st_geogfromwkb(NULL)):binary>
+struct<st_asbinary(st_geogfromwkb(NULL), NDR):binary>
 -- !query output
 NULL
 
@@ -582,7 +582,7 @@ NULL
 -- !query
 SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040')))
 -- !query schema
-struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))):string>
+struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), NDR)):string>
 -- !query output
 0101000000000000000000F03F0000000000000040
 
@@ -622,7 +622,7 @@ struct<count(1):bigint>
 -- !query
 SELECT ST_AsBinary(ST_GeomFromWKB(NULL))
 -- !query schema
-struct<st_asbinary(st_geomfromwkb(NULL, 0)):binary>
+struct<st_asbinary(st_geomfromwkb(NULL, 0), NDR):binary>
 -- !query output
 NULL
 
@@ -630,7 +630,7 @@ NULL
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040')))
 -- !query schema
-struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0))):string>
+struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), NDR)):string>
 -- !query output
 0101000000000000000000F03F0000000000000040
 
@@ -638,7 +638,7 @@ struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F00000000000000
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 0)))
 -- !query schema
-struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0))):string>
+struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), NDR)):string>
 -- !query output
 0101000000000000000000F03F0000000000000040
 
@@ -646,7 +646,7 @@ struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F00000000000000
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 3857)))
 -- !query schema
-struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 3857))):string>
+struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 3857), NDR)):string>
 -- !query output
 0101000000000000000000F03F0000000000000040
 
@@ -654,7 +654,7 @@ struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F00000000000000
 -- !query
 SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 4326)))
 -- !query schema
-struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 4326))):string>
+struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 4326), NDR)):string>
 -- !query output
 0101000000000000000000F03F0000000000000040
 
@@ -768,6 +768,200 @@ org.apache.spark.SparkIllegalArgumentException
 }
 
 
+-- !query
+SELECT ST_AsBinary(NULL)
+-- !query schema
+struct<st_asbinary(NULL, NDR):binary>
+-- !query output
+NULL
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040')))
+-- !query schema
+struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), NDR)):string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'00000000013FF00000000000004000000000000000'), 'NDR'))
+-- !query schema
+struct<hex(st_asbinary(st_geogfromwkb(X'00000000013FF00000000000004000000000000000'), NDR)):string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR'))
+-- !query schema
+struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'), XDR)):string>
+-- !query output
+00000000013FF00000000000004000000000000000
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040')))
+-- !query schema
+struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), NDR)):string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'00000000013FF00000000000004000000000000000'), 'NDR'))
+-- !query schema
+struct<hex(st_asbinary(st_geomfromwkb(X'00000000013FF00000000000004000000000000000', 0), NDR)):string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR'))
+-- !query schema
+struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0), XDR)):string>
+-- !query output
+00000000013FF00000000000004000000000000000
+
+
+-- !query
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), '')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ST_INVALID_ENDIANNESS_VALUE",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "endianness" : ""
+  }
+}
+
+
+-- !query
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'ABC')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ST_INVALID_ENDIANNESS_VALUE",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "endianness" : "ABC"
+  }
+}
+
+
+-- !query
+SELECT ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'), 'big-endian')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ST_INVALID_ENDIANNESS_VALUE",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "endianness" : "big-endian"
+  }
+}
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb)) <> wkb
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+0
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'NDR') <> wkb
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+0
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'XDR') = wkb
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+0
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb)) <> wkb
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+0
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb), 'NDR') <> wkb
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+0
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeomFromWKB(wkb), 'XDR') = wkb
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+0
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), '') IS NOT NULL
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ST_INVALID_ENDIANNESS_VALUE",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "endianness" : ""
+  }
+}
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'ABC') IS NOT NULL
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ST_INVALID_ENDIANNESS_VALUE",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "endianness" : "ABC"
+  }
+}
+
+
+-- !query
+SELECT COUNT(*) FROM geodata WHERE ST_AsBinary(ST_GeogFromWKB(wkb), 'big-endian') IS NOT NULL
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ST_INVALID_ENDIANNESS_VALUE",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "endianness" : "big-endian"
+  }
+}
+
+
 -- !query
 SELECT ST_Srid(NULL)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 85bd9137602a3..14a89975a8571 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -607,3 +607,44 @@ struct<c1:int,c2:int,scalarsubquery(c1):bigint>
 -- !query output
 0	1	NULL
 1	2	NULL
+
+
+-- !query
+SELECT (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1)
+-- !query schema
+struct<scalarsubquery(col1):int>
+-- !query output
+1
+
+
+-- !query
+SELECT (SELECT t1.s.* FROM VALUES(2) AS t2(col1) LIMIT 1)
+FROM (SELECT named_struct('a', 1) AS s) AS t1
+-- !query schema
+struct<scalarsubquery(s):int>
+-- !query output
+1
+
+
+-- !query
+SELECT (SELECT * FROM VALUES(2) AS t2(col1) LIMIT 1) FROM VALUES(1) AS t1(col1)
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+2
+
+
+-- !query
+SELECT (SELECT t1.* FROM (SELECT 3 AS col1) AS t1 LIMIT 1) FROM VALUES(1) AS t1(col1)
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+3
+
+
+-- !query
+SELECT (SELECT * FROM (SELECT t1.* FROM VALUES(2) AS t2(col1) LIMIT 1)) FROM VALUES(1) AS t1(col1)
+-- !query schema
+struct<scalarsubquery(col1):int>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/time-bucket.sql.out b/sql/core/src/test/resources/sql-tests/results/time-bucket.sql.out
new file mode 100644
index 0000000000000..e1f79ff43e4b3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/time-bucket.sql.out
@@ -0,0 +1,1080 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SET TIME ZONE 'UTC'
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.session.timeZone	UTC
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '00' SECOND",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '00' SECOND, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '0' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '0' YEAR",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-15' MINUTE",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-1' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-1' YEAR",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '00' MINUTE",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '15' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-10' MINUTE",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '05' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 95,
+    "fragment" : "time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '0' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '3' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "INTERVAL '-2' MONTH",
+    "exprName" : "`bucketSize`",
+    "sqlExpr" : "\"time_bucket((INTERVAL '1' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"",
+    "valueRange" : "(0, inf)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP_NTZ '2024-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP_NTZ\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 107,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP '2024-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"TIMESTAMP_NTZ\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 107,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"15 minutes\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\")",
+    "sqlExpr" : "\"time_bucket(15 minutes, TIMESTAMP '2024-01-15 10:23:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 65,
+    "fragment" : "time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '2024-01-15'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2024-01-15 10:23:00\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, 2024-01-15 10:23:00, TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '2024-01-01'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2024-01-01 00:00:00\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "third",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', 2024-01-01 00:00:00)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' HOUR) tab(bs)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"bs\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' MONTH) tab(bs)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"bs\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL MONTH\"",
+    "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"scalarsubquery()\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"scalarsubquery()\"",
+    "inputName" : "`bucketSize`",
+    "inputType" : "\"INTERVAL MONTH\"",
+    "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"o\"",
+    "inputName" : "`origin`",
+    "inputType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"o\"",
+    "inputName" : "`origin`",
+    "inputType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00'))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"scalarsubquery()\"",
+    "inputName" : "`origin`",
+    "inputType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', scalarsubquery())\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 112,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00'))"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`time_bucket`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR)"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "4",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`time_bucket`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 136,
+    "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00')"
+  } ]
+}
+
+
+-- !query
+SELECT time_bucket(NULL, TIMESTAMP '2024-01-01 11:27:00')
+-- !query schema
+struct<time_bucket(NULL, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL)
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, NULL, TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', NULL)
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', NULL):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, NULL)
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, NULL, NULL):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP '2024-01-01 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, NULL, TIMESTAMP '2024-01-01 00:00:00'):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00'):timestamp_ntz>
+-- !query output
+NULL
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00')
+-- !query schema
+struct<time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-01-01 11:15:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-01-01 11:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '7' DAY, TIMESTAMP '2024-01-10 11:27:00')
+-- !query schema
+struct<time_bucket(INTERVAL '7' DAY, TIMESTAMP '2024-01-10 11:27:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-01-04 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1 00:30' DAY TO MINUTE, TIMESTAMP '2024-06-20 10:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1 00:30' DAY TO MINUTE, TIMESTAMP '2024-06-20 10:00:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-06-20 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '0.000001' SECOND, TIMESTAMP '2024-06-20 10:00:00.123456')
+-- !query schema
+struct<time_bucket(INTERVAL '00.000001' SECOND, TIMESTAMP '2024-06-20 10:00:00.123456', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-06-20 10:00:00.123456
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00')
+-- !query schema
+struct<time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP_NTZ '1970-01-01 00:00:00'):timestamp_ntz>
+-- !query output
+2024-01-01 11:15:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:05:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:05:00'):timestamp>
+-- !query output
+2024-01-01 11:05:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:15:00')
+-- !query schema
+struct<time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:15:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-01-01 11:15:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 11:27:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 11:27:00'):timestamp>
+-- !query output
+2024-01-01 11:27:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2025-01-01 00:30:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2025-01-01 00:30:00'):timestamp>
+-- !query output
+2024-01-01 10:30:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00'):timestamp_ntz>
+-- !query output
+2024-01-15 09:30:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 11:27:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 11:27:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-03-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-05-15 10:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-05-15 10:00:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-04-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2024-05-15 10:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2024-05-15 10:00:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-01-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1-3' YEAR TO MONTH, TIMESTAMP '2024-06-20 10:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1-3' YEAR TO MONTH, TIMESTAMP '2024-06-20 10:00:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2023-10-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00', TIMESTAMP_NTZ '1970-01-01 00:00:00'):timestamp_ntz>
+-- !query output
+2024-03-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-03-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-20 09:00:00', TIMESTAMP '1970-01-15 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-20 09:00:00', TIMESTAMP '1970-01-15 00:00:00'):timestamp>
+-- !query output
+2024-03-15 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 10:00:00', TIMESTAMP '2024-03-15 10:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 10:00:00', TIMESTAMP '2024-03-15 10:00:00'):timestamp>
+-- !query output
+2024-03-15 10:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 12:00:00', TIMESTAMP '1970-01-31 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 12:00:00', TIMESTAMP '1970-01-31 00:00:00'):timestamp>
+-- !query output
+2024-02-29 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2025-03-01 00:00:00', TIMESTAMP '2024-02-29 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2025-03-01 00:00:00', TIMESTAMP '2024-02-29 00:00:00'):timestamp>
+-- !query output
+2025-02-28 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-02-15 10:00:00', TIMESTAMP '2024-08-01 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-02-15 10:00:00', TIMESTAMP '2024-08-01 00:00:00'):timestamp>
+-- !query output
+2024-02-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00'):timestamp_ntz>
+-- !query output
+2024-07-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' DAY, TIMESTAMP '1969-12-31 23:30:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' DAY, TIMESTAMP '1969-12-31 23:30:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+1969-12-31 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '1969-12-31 23:30:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '1969-12-31 23:30:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+1969-12-31 23:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1960-06-15 00:30:00')
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1960-06-15 00:30:00'):timestamp>
+-- !query output
+2024-01-01 10:30:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '1968-07-15 10:00:00')
+-- !query schema
+struct<time_bucket(INTERVAL '1' MONTH, TIMESTAMP '1968-07-15 10:00:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+1968-07-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '10' MINUTE + INTERVAL '5' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<time_bucket((INTERVAL '10' MINUTE + INTERVAL '05' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-06-20 09:45:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<time_bucket((INTERVAL '01' HOUR - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-06-20 09:45:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '2' MONTH + INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00')
+-- !query schema
+struct<time_bucket((INTERVAL '2' MONTH + INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-04-01 00:00:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '2024-01-01 00:00:00' + INTERVAL '5' MINUTE)
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '2024-01-01 00:00:00' + INTERVAL '05' MINUTE):timestamp>
+-- !query output
+2024-06-20 09:05:00
+
+
+-- !query
+SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00' + INTERVAL '30' MINUTE)
+-- !query schema
+struct<time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00' + INTERVAL '30' MINUTE, TIMESTAMP '1970-01-01 00:00:00'):timestamp>
+-- !query output
+2024-06-20 10:00:00
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' HOUR, t) AS bucket
+  FROM VALUES (TIMESTAMP '2024-01-15 10:23:00'), (TIMESTAMP '2024-01-15 14:45:00'), (CAST(NULL AS TIMESTAMP)) tab(t)
+  ORDER BY t
+-- !query schema
+struct<t:timestamp,bucket:timestamp>
+-- !query output
+NULL	NULL
+2024-01-15 10:23:00	2024-01-15 10:00:00
+2024-01-15 14:45:00	2024-01-15 14:00:00
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '15' MINUTE, t) AS bucket
+  FROM VALUES (TIMESTAMP_NTZ '2024-01-15 10:23:00'), (TIMESTAMP_NTZ '2024-01-15 14:07:00') tab(t)
+  ORDER BY t
+-- !query schema
+struct<t:timestamp_ntz,bucket:timestamp_ntz>
+-- !query output
+2024-01-15 10:23:00	2024-01-15 10:15:00
+2024-01-15 14:07:00	2024-01-15 14:00:00
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket
+  FROM VALUES (TIMESTAMP '2024-03-15 10:23:00'), (TIMESTAMP '2024-06-01 00:00:00') tab(t)
+  ORDER BY t
+-- !query schema
+struct<t:timestamp,bucket:timestamp>
+-- !query output
+2024-03-15 10:23:00	2024-03-01 00:00:00
+2024-06-01 00:00:00	2024-06-01 00:00:00
+
+
+-- !query
+SET TIME ZONE 'America/Los_Angeles'
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.session.timeZone	America/Los_Angeles
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-02-15 10:00:00'),
+    (TIMESTAMP '2024-03-15 10:00:00'),
+    (TIMESTAMP '2024-04-15 10:00:00') tab(t)
+  ORDER BY t
+-- !query schema
+struct<t:timestamp,bucket:timestamp>
+-- !query output
+2024-02-15 10:00:00	2024-02-01 00:00:00
+2024-03-15 10:00:00	2024-03-01 00:00:00
+2024-04-15 10:00:00	2024-04-01 00:00:00
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' MONTH, CAST(t AS TIMESTAMP_NTZ)) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-02-15 10:00:00'),
+    (TIMESTAMP '2024-03-15 10:00:00'),
+    (TIMESTAMP '2024-04-15 10:00:00') tab(t)
+  ORDER BY t
+-- !query schema
+struct<t:timestamp,bucket:timestamp_ntz>
+-- !query output
+2024-02-15 10:00:00	2024-02-01 00:00:00
+2024-03-15 10:00:00	2024-03-01 00:00:00
+2024-04-15 10:00:00	2024-04-01 00:00:00
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' DAY, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-03-09 12:00:00'),
+    (TIMESTAMP '2024-03-10 12:00:00'),
+    (TIMESTAMP '2024-03-11 12:00:00') tab(t)
+  ORDER BY t
+-- !query schema
+struct<t:timestamp,bucket:timestamp>
+-- !query output
+2024-03-09 12:00:00	2024-03-09 00:00:00
+2024-03-10 12:00:00	2024-03-10 00:00:00
+2024-03-11 12:00:00	2024-03-11 00:00:00
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '1' DAY, t) AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-11-02 12:00:00'),
+    (TIMESTAMP '2024-11-03 12:00:00'),
+    (TIMESTAMP '2024-11-04 12:00:00') tab(t)
+  ORDER BY t
+-- !query schema
+struct<t:timestamp,bucket:timestamp>
+-- !query output
+2024-11-02 12:00:00	2024-11-02 00:00:00
+2024-11-03 12:00:00	2024-11-03 00:00:00
+2024-11-04 12:00:00	2024-11-04 00:00:00
+
+
+-- !query
+SELECT t, time_bucket(INTERVAL '36' HOUR, t, TIMESTAMP '2024-11-01 00:00:00') AS bucket
+  FROM VALUES
+    (TIMESTAMP '2024-11-05 11:30:00') tab(t)
+-- !query schema
+struct<t:timestamp,bucket:timestamp>
+-- !query output
+2024-11-05 11:30:00	2024-11-04 00:00:00
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
index bb75fe5991acf..f9c32cfa7fab5 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -263,6 +263,30 @@ struct<length(1996-09-10 10:11:12.4):int>
 21
 
 
+-- !query
+SELECT '12:00:00' = TIME'12:00:00' FROM t
+-- !query schema
+struct<(12:00:00 = TIME '12:00:00'):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT '12:00:01' > TIME'12:00:00' FROM t
+-- !query schema
+struct<(12:00:01 > TIME '12:00:00'):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT time_trunc('HOUR', '12:34:56') FROM t
+-- !query schema
+struct<time_trunc(HOUR, 12:34:56):time(6)>
+-- !query output
+12:00:00
+
+
 -- !query
 SELECT year( '1996-01-10') FROM t
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
index 497e307f592fb..7410e7eaafd6f 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
@@ -897,8 +897,8 @@ DESCRIBE EXTENDED v
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	bigint              	c1 6d               
-c2                  	string              	c2 6d               
+c1                  	bigint              	c1 6e               
+c2                  	string              	c2 6e               
                     	                    	                    
 # Detailed Table Information	                    	                    
 Catalog             	spark_catalog       	                    
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
index d088cb0190d7a..2572fe57402cd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
@@ -1,12 +1,12 @@
 == Physical Plan ==
-TakeOrderedAndProject (129)
-+- * HashAggregate (128)
-   +- Exchange (127)
-      +- * HashAggregate (126)
-         +- Union (125)
-            :- * HashAggregate (82)
-            :  +- * HashAggregate (81)
-            :     +- Union (80)
+TakeOrderedAndProject (123)
++- * HashAggregate (122)
+   +- Exchange (121)
+      +- * HashAggregate (120)
+         +- Union (119)
+            :- * HashAggregate (76)
+            :  +- * HashAggregate (75)
+            :     +- Union (74)
             :        :- * Project (30)
             :        :  +- * BroadcastHashJoin LeftOuter BuildRight (29)
             :        :     :- * HashAggregate (15)
@@ -37,8 +37,8 @@ TakeOrderedAndProject (129)
             :        :                       :     :     +- Scan parquet spark_catalog.default.store_returns (16)
             :        :                       :     +- ReusedExchange (19)
             :        :                       +- ReusedExchange (22)
-            :        :- * Project (49)
-            :        :  +- * BroadcastNestedLoopJoin Inner BuildRight (48)
+            :        :- * Project (43)
+            :        :  +- * BroadcastNestedLoopJoin Inner BuildRight (42)
             :        :     :- * HashAggregate (38)
             :        :     :  +- Exchange (37)
             :        :     :     +- * HashAggregate (36)
@@ -47,87 +47,81 @@ TakeOrderedAndProject (129)
             :        :     :              :- * ColumnarToRow (32)
             :        :     :              :  +- Scan parquet spark_catalog.default.catalog_sales (31)
             :        :     :              +- ReusedExchange (33)
-            :        :     +- BroadcastExchange (47)
-            :        :        +- * HashAggregate (46)
-            :        :           +- Exchange (45)
-            :        :              +- * HashAggregate (44)
-            :        :                 +- * Project (43)
-            :        :                    +- * BroadcastHashJoin Inner BuildRight (42)
-            :        :                       :- * ColumnarToRow (40)
-            :        :                       :  +- Scan parquet spark_catalog.default.catalog_returns (39)
-            :        :                       +- ReusedExchange (41)
-            :        +- * Project (79)
-            :           +- * BroadcastHashJoin LeftOuter BuildRight (78)
-            :              :- * HashAggregate (64)
-            :              :  +- Exchange (63)
-            :              :     +- * HashAggregate (62)
-            :              :        +- * Project (61)
-            :              :           +- * BroadcastHashJoin Inner BuildRight (60)
-            :              :              :- * Project (55)
-            :              :              :  +- * BroadcastHashJoin Inner BuildRight (54)
-            :              :              :     :- * Filter (52)
-            :              :              :     :  +- * ColumnarToRow (51)
-            :              :              :     :     +- Scan parquet spark_catalog.default.web_sales (50)
-            :              :              :     +- ReusedExchange (53)
-            :              :              +- BroadcastExchange (59)
-            :              :                 +- * Filter (58)
-            :              :                    +- * ColumnarToRow (57)
-            :              :                       +- Scan parquet spark_catalog.default.web_page (56)
-            :              +- BroadcastExchange (77)
-            :                 +- * HashAggregate (76)
-            :                    +- Exchange (75)
-            :                       +- * HashAggregate (74)
-            :                          +- * Project (73)
-            :                             +- * BroadcastHashJoin Inner BuildRight (72)
-            :                                :- * Project (70)
-            :                                :  +- * BroadcastHashJoin Inner BuildRight (69)
-            :                                :     :- * Filter (67)
-            :                                :     :  +- * ColumnarToRow (66)
-            :                                :     :     +- Scan parquet spark_catalog.default.web_returns (65)
-            :                                :     +- ReusedExchange (68)
-            :                                +- ReusedExchange (71)
-            :- * HashAggregate (103)
-            :  +- Exchange (102)
-            :     +- * HashAggregate (101)
-            :        +- * HashAggregate (100)
-            :           +- * HashAggregate (99)
-            :              +- Union (98)
-            :                 :- * Project (87)
-            :                 :  +- * BroadcastHashJoin LeftOuter BuildRight (86)
-            :                 :     :- * HashAggregate (84)
-            :                 :     :  +- ReusedExchange (83)
-            :                 :     +- ReusedExchange (85)
-            :                 :- * Project (92)
-            :                 :  +- * BroadcastNestedLoopJoin Inner BuildRight (91)
-            :                 :     :- * HashAggregate (89)
-            :                 :     :  +- ReusedExchange (88)
-            :                 :     +- ReusedExchange (90)
-            :                 +- * Project (97)
-            :                    +- * BroadcastHashJoin LeftOuter BuildRight (96)
-            :                       :- * HashAggregate (94)
-            :                       :  +- ReusedExchange (93)
-            :                       +- ReusedExchange (95)
-            +- * HashAggregate (124)
-               +- Exchange (123)
-                  +- * HashAggregate (122)
-                     +- * HashAggregate (121)
-                        +- * HashAggregate (120)
-                           +- Union (119)
-                              :- * Project (108)
-                              :  +- * BroadcastHashJoin LeftOuter BuildRight (107)
-                              :     :- * HashAggregate (105)
-                              :     :  +- ReusedExchange (104)
-                              :     +- ReusedExchange (106)
-                              :- * Project (113)
-                              :  +- * BroadcastNestedLoopJoin Inner BuildRight (112)
-                              :     :- * HashAggregate (110)
-                              :     :  +- ReusedExchange (109)
-                              :     +- ReusedExchange (111)
-                              +- * Project (118)
-                                 +- * BroadcastHashJoin LeftOuter BuildRight (117)
-                                    :- * HashAggregate (115)
-                                    :  +- ReusedExchange (114)
-                                    +- ReusedExchange (116)
+            :        :     +- BroadcastExchange (41)
+            :        :        +- * Project (40)
+            :        :           +- * Scan OneRowRelation (39)
+            :        +- * Project (73)
+            :           +- * BroadcastHashJoin LeftOuter BuildRight (72)
+            :              :- * HashAggregate (58)
+            :              :  +- Exchange (57)
+            :              :     +- * HashAggregate (56)
+            :              :        +- * Project (55)
+            :              :           +- * BroadcastHashJoin Inner BuildRight (54)
+            :              :              :- * Project (49)
+            :              :              :  +- * BroadcastHashJoin Inner BuildRight (48)
+            :              :              :     :- * Filter (46)
+            :              :              :     :  +- * ColumnarToRow (45)
+            :              :              :     :     +- Scan parquet spark_catalog.default.web_sales (44)
+            :              :              :     +- ReusedExchange (47)
+            :              :              +- BroadcastExchange (53)
+            :              :                 +- * Filter (52)
+            :              :                    +- * ColumnarToRow (51)
+            :              :                       +- Scan parquet spark_catalog.default.web_page (50)
+            :              +- BroadcastExchange (71)
+            :                 +- * HashAggregate (70)
+            :                    +- Exchange (69)
+            :                       +- * HashAggregate (68)
+            :                          +- * Project (67)
+            :                             +- * BroadcastHashJoin Inner BuildRight (66)
+            :                                :- * Project (64)
+            :                                :  +- * BroadcastHashJoin Inner BuildRight (63)
+            :                                :     :- * Filter (61)
+            :                                :     :  +- * ColumnarToRow (60)
+            :                                :     :     +- Scan parquet spark_catalog.default.web_returns (59)
+            :                                :     +- ReusedExchange (62)
+            :                                +- ReusedExchange (65)
+            :- * HashAggregate (97)
+            :  +- Exchange (96)
+            :     +- * HashAggregate (95)
+            :        +- * HashAggregate (94)
+            :           +- * HashAggregate (93)
+            :              +- Union (92)
+            :                 :- * Project (81)
+            :                 :  +- * BroadcastHashJoin LeftOuter BuildRight (80)
+            :                 :     :- * HashAggregate (78)
+            :                 :     :  +- ReusedExchange (77)
+            :                 :     +- ReusedExchange (79)
+            :                 :- * Project (86)
+            :                 :  +- * BroadcastNestedLoopJoin Inner BuildRight (85)
+            :                 :     :- * HashAggregate (83)
+            :                 :     :  +- ReusedExchange (82)
+            :                 :     +- ReusedExchange (84)
+            :                 +- * Project (91)
+            :                    +- * BroadcastHashJoin LeftOuter BuildRight (90)
+            :                       :- * HashAggregate (88)
+            :                       :  +- ReusedExchange (87)
+            :                       +- ReusedExchange (89)
+            +- * HashAggregate (118)
+               +- Exchange (117)
+                  +- * HashAggregate (116)
+                     +- * HashAggregate (115)
+                        +- * HashAggregate (114)
+                           +- Union (113)
+                              :- * Project (102)
+                              :  +- * BroadcastHashJoin LeftOuter BuildRight (101)
+                              :     :- * HashAggregate (99)
+                              :     :  +- ReusedExchange (98)
+                              :     +- ReusedExchange (100)
+                              :- * Project (107)
+                              :  +- * BroadcastNestedLoopJoin Inner BuildRight (106)
+                              :     :- * HashAggregate (104)
+                              :     :  +- ReusedExchange (103)
+                              :     +- ReusedExchange (105)
+                              +- * Project (112)
+                                 +- * BroadcastHashJoin LeftOuter BuildRight (111)
+                                    :- * HashAggregate (109)
+                                    :  +- ReusedExchange (108)
+                                    +- ReusedExchange (110)
 
 
 (1) Scan parquet spark_catalog.default.store_sales
@@ -145,7 +139,7 @@ Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_s
 Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_store_sk#1)
 
-(4) ReusedExchange [Reuses operator id: 134]
+(4) ReusedExchange [Reuses operator id: 128]
 Output [1]: [d_date_sk#6]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -219,7 +213,7 @@ Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_s
 Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19]
 Condition : isnotnull(sr_store_sk#16)
 
-(19) ReusedExchange [Reuses operator id: 134]
+(19) ReusedExchange [Reuses operator id: 128]
 Output [1]: [d_date_sk#20]
 
 (20) BroadcastHashJoin [codegen id : 6]
@@ -287,7 +281,7 @@ ReadSchema: struct<cs_call_center_sk:int,cs_ext_sales_price:decimal(7,2),cs_net_
 (32) ColumnarToRow [codegen id : 10]
 Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37]
 
-(33) ReusedExchange [Reuses operator id: 134]
+(33) ReusedExchange [Reuses operator id: 128]
 Output [1]: [d_date_sk#38]
 
 (34) BroadcastHashJoin [codegen id : 10]
@@ -311,484 +305,450 @@ Results [3]: [cs_call_center_sk#34, sum#41, sum#42]
 Input [3]: [cs_call_center_sk#34, sum#41, sum#42]
 Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(38) HashAggregate [codegen id : 14]
+(38) HashAggregate [codegen id : 12]
 Input [3]: [cs_call_center_sk#34, sum#41, sum#42]
 Keys [1]: [cs_call_center_sk#34]
 Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))]
 Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44]
 Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46]
 
-(39) Scan parquet spark_catalog.default.catalog_returns
-Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
-Batched: true
-Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cr_returned_date_sk#49), dynamicpruningexpression(cr_returned_date_sk#49 IN dynamicpruning#5)]
-ReadSchema: struct<cr_return_amount:decimal(7,2),cr_net_loss:decimal(7,2)>
-
-(40) ColumnarToRow [codegen id : 12]
-Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
-
-(41) ReusedExchange [Reuses operator id: 134]
-Output [1]: [d_date_sk#50]
-
-(42) BroadcastHashJoin [codegen id : 12]
-Left keys [1]: [cr_returned_date_sk#49]
-Right keys [1]: [d_date_sk#50]
-Join type: Inner
-Join condition: None
-
-(43) Project [codegen id : 12]
-Output [2]: [cr_return_amount#47, cr_net_loss#48]
-Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50]
-
-(44) HashAggregate [codegen id : 12]
-Input [2]: [cr_return_amount#47, cr_net_loss#48]
-Keys: []
-Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))]
-Aggregate Attributes [2]: [sum#51, sum#52]
-Results [2]: [sum#53, sum#54]
+(39) Scan OneRowRelation [codegen id : 11]
+Output: []
 
-(45) Exchange
-Input [2]: [sum#53, sum#54]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6]
+(40) Project [codegen id : 11]
+Output [2]: [Subquery scalar-subquery#47, [id=#6].returns AS returns#48, ReusedSubquery Subquery scalar-subquery#47, [id=#6].profit_loss AS profit_loss#49]
+Input: []
 
-(46) HashAggregate [codegen id : 13]
-Input [2]: [sum#53, sum#54]
-Keys: []
-Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56]
-Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58]
-
-(47) BroadcastExchange
-Input [2]: [returns#57, profit_loss#58]
+(41) BroadcastExchange
+Input [2]: [returns#48, profit_loss#49]
 Arguments: IdentityBroadcastMode, [plan_id=7]
 
-(48) BroadcastNestedLoopJoin [codegen id : 14]
+(42) BroadcastNestedLoopJoin [codegen id : 12]
 Join type: Inner
 Join condition: None
 
-(49) Project [codegen id : 14]
-Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#61]
-Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58]
+(43) Project [codegen id : 12]
+Output [5]: [catalog channel AS channel#50, cs_call_center_sk#34 AS id#51, sales#45, returns#48, (profit#46 - profit_loss#49) AS profit#52]
+Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#48, profit_loss#49]
 
-(50) Scan parquet spark_catalog.default.web_sales
-Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
+(44) Scan parquet spark_catalog.default.web_sales
+Output [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#56), dynamicpruningexpression(ws_sold_date_sk#56 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(ws_web_page_sk)]
 ReadSchema: struct<ws_web_page_sk:int,ws_ext_sales_price:decimal(7,2),ws_net_profit:decimal(7,2)>
 
-(51) ColumnarToRow [codegen id : 17]
-Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
+(45) ColumnarToRow [codegen id : 15]
+Input [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56]
 
-(52) Filter [codegen id : 17]
-Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
-Condition : isnotnull(ws_web_page_sk#62)
+(46) Filter [codegen id : 15]
+Input [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56]
+Condition : isnotnull(ws_web_page_sk#53)
 
-(53) ReusedExchange [Reuses operator id: 134]
-Output [1]: [d_date_sk#66]
+(47) ReusedExchange [Reuses operator id: 128]
+Output [1]: [d_date_sk#57]
 
-(54) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_sold_date_sk#65]
-Right keys [1]: [d_date_sk#66]
+(48) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [ws_sold_date_sk#56]
+Right keys [1]: [d_date_sk#57]
 Join type: Inner
 Join condition: None
 
-(55) Project [codegen id : 17]
-Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64]
-Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66]
+(49) Project [codegen id : 15]
+Output [3]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55]
+Input [5]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56, d_date_sk#57]
 
-(56) Scan parquet spark_catalog.default.web_page
-Output [1]: [wp_web_page_sk#67]
+(50) Scan parquet spark_catalog.default.web_page
+Output [1]: [wp_web_page_sk#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
 PushedFilters: [IsNotNull(wp_web_page_sk)]
 ReadSchema: struct<wp_web_page_sk:int>
 
-(57) ColumnarToRow [codegen id : 16]
-Input [1]: [wp_web_page_sk#67]
+(51) ColumnarToRow [codegen id : 14]
+Input [1]: [wp_web_page_sk#58]
 
-(58) Filter [codegen id : 16]
-Input [1]: [wp_web_page_sk#67]
-Condition : isnotnull(wp_web_page_sk#67)
+(52) Filter [codegen id : 14]
+Input [1]: [wp_web_page_sk#58]
+Condition : isnotnull(wp_web_page_sk#58)
 
-(59) BroadcastExchange
-Input [1]: [wp_web_page_sk#67]
+(53) BroadcastExchange
+Input [1]: [wp_web_page_sk#58]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8]
 
-(60) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_web_page_sk#62]
-Right keys [1]: [wp_web_page_sk#67]
+(54) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [ws_web_page_sk#53]
+Right keys [1]: [wp_web_page_sk#58]
 Join type: Inner
 Join condition: None
 
-(61) Project [codegen id : 17]
-Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67]
-Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67]
-
-(62) HashAggregate [codegen id : 17]
-Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67]
-Keys [1]: [wp_web_page_sk#67]
-Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))]
-Aggregate Attributes [2]: [sum#68, sum#69]
-Results [3]: [wp_web_page_sk#67, sum#70, sum#71]
-
-(63) Exchange
-Input [3]: [wp_web_page_sk#67, sum#70, sum#71]
-Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=9]
-
-(64) HashAggregate [codegen id : 22]
-Input [3]: [wp_web_page_sk#67, sum#70, sum#71]
-Keys [1]: [wp_web_page_sk#67]
-Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73]
-Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75]
-
-(65) Scan parquet spark_catalog.default.web_returns
-Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
+(55) Project [codegen id : 15]
+Output [3]: [ws_ext_sales_price#54, ws_net_profit#55, wp_web_page_sk#58]
+Input [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, wp_web_page_sk#58]
+
+(56) HashAggregate [codegen id : 15]
+Input [3]: [ws_ext_sales_price#54, ws_net_profit#55, wp_web_page_sk#58]
+Keys [1]: [wp_web_page_sk#58]
+Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#54)), partial_sum(UnscaledValue(ws_net_profit#55))]
+Aggregate Attributes [2]: [sum#59, sum#60]
+Results [3]: [wp_web_page_sk#58, sum#61, sum#62]
+
+(57) Exchange
+Input [3]: [wp_web_page_sk#58, sum#61, sum#62]
+Arguments: hashpartitioning(wp_web_page_sk#58, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+
+(58) HashAggregate [codegen id : 20]
+Input [3]: [wp_web_page_sk#58, sum#61, sum#62]
+Keys [1]: [wp_web_page_sk#58]
+Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#54)), sum(UnscaledValue(ws_net_profit#55))]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#54))#63, sum(UnscaledValue(ws_net_profit#55))#64]
+Results [3]: [wp_web_page_sk#58, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#54))#63,17,2) AS sales#65, MakeDecimal(sum(UnscaledValue(ws_net_profit#55))#64,17,2) AS profit#66]
+
+(59) Scan parquet spark_catalog.default.web_returns
+Output [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(wr_returned_date_sk#79), dynamicpruningexpression(wr_returned_date_sk#79 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(wr_returned_date_sk#70), dynamicpruningexpression(wr_returned_date_sk#70 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(wr_web_page_sk)]
 ReadSchema: struct<wr_web_page_sk:int,wr_return_amt:decimal(7,2),wr_net_loss:decimal(7,2)>
 
-(66) ColumnarToRow [codegen id : 20]
-Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
+(60) ColumnarToRow [codegen id : 18]
+Input [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70]
 
-(67) Filter [codegen id : 20]
-Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
-Condition : isnotnull(wr_web_page_sk#76)
+(61) Filter [codegen id : 18]
+Input [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70]
+Condition : isnotnull(wr_web_page_sk#67)
 
-(68) ReusedExchange [Reuses operator id: 134]
-Output [1]: [d_date_sk#80]
+(62) ReusedExchange [Reuses operator id: 128]
+Output [1]: [d_date_sk#71]
 
-(69) BroadcastHashJoin [codegen id : 20]
-Left keys [1]: [wr_returned_date_sk#79]
-Right keys [1]: [d_date_sk#80]
+(63) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [wr_returned_date_sk#70]
+Right keys [1]: [d_date_sk#71]
 Join type: Inner
 Join condition: None
 
-(70) Project [codegen id : 20]
-Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78]
-Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80]
+(64) Project [codegen id : 18]
+Output [3]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69]
+Input [5]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70, d_date_sk#71]
 
-(71) ReusedExchange [Reuses operator id: 59]
-Output [1]: [wp_web_page_sk#81]
+(65) ReusedExchange [Reuses operator id: 53]
+Output [1]: [wp_web_page_sk#72]
 
-(72) BroadcastHashJoin [codegen id : 20]
-Left keys [1]: [wr_web_page_sk#76]
-Right keys [1]: [wp_web_page_sk#81]
+(66) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [wr_web_page_sk#67]
+Right keys [1]: [wp_web_page_sk#72]
 Join type: Inner
 Join condition: None
 
-(73) Project [codegen id : 20]
-Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81]
-Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81]
-
-(74) HashAggregate [codegen id : 20]
-Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81]
-Keys [1]: [wp_web_page_sk#81]
-Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))]
-Aggregate Attributes [2]: [sum#82, sum#83]
-Results [3]: [wp_web_page_sk#81, sum#84, sum#85]
-
-(75) Exchange
-Input [3]: [wp_web_page_sk#81, sum#84, sum#85]
-Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=10]
-
-(76) HashAggregate [codegen id : 21]
-Input [3]: [wp_web_page_sk#81, sum#84, sum#85]
-Keys [1]: [wp_web_page_sk#81]
-Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))]
-Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87]
-Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89]
-
-(77) BroadcastExchange
-Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89]
+(67) Project [codegen id : 18]
+Output [3]: [wr_return_amt#68, wr_net_loss#69, wp_web_page_sk#72]
+Input [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wp_web_page_sk#72]
+
+(68) HashAggregate [codegen id : 18]
+Input [3]: [wr_return_amt#68, wr_net_loss#69, wp_web_page_sk#72]
+Keys [1]: [wp_web_page_sk#72]
+Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#68)), partial_sum(UnscaledValue(wr_net_loss#69))]
+Aggregate Attributes [2]: [sum#73, sum#74]
+Results [3]: [wp_web_page_sk#72, sum#75, sum#76]
+
+(69) Exchange
+Input [3]: [wp_web_page_sk#72, sum#75, sum#76]
+Arguments: hashpartitioning(wp_web_page_sk#72, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+
+(70) HashAggregate [codegen id : 19]
+Input [3]: [wp_web_page_sk#72, sum#75, sum#76]
+Keys [1]: [wp_web_page_sk#72]
+Functions [2]: [sum(UnscaledValue(wr_return_amt#68)), sum(UnscaledValue(wr_net_loss#69))]
+Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#68))#77, sum(UnscaledValue(wr_net_loss#69))#78]
+Results [3]: [wp_web_page_sk#72, MakeDecimal(sum(UnscaledValue(wr_return_amt#68))#77,17,2) AS returns#79, MakeDecimal(sum(UnscaledValue(wr_net_loss#69))#78,17,2) AS profit_loss#80]
+
+(71) BroadcastExchange
+Input [3]: [wp_web_page_sk#72, returns#79, profit_loss#80]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11]
 
-(78) BroadcastHashJoin [codegen id : 22]
-Left keys [1]: [wp_web_page_sk#67]
-Right keys [1]: [wp_web_page_sk#81]
+(72) BroadcastHashJoin [codegen id : 20]
+Left keys [1]: [wp_web_page_sk#58]
+Right keys [1]: [wp_web_page_sk#72]
 Join type: LeftOuter
 Join condition: None
 
-(79) Project [codegen id : 22]
-Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#93]
-Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89]
+(73) Project [codegen id : 20]
+Output [5]: [web channel AS channel#81, wp_web_page_sk#58 AS id#82, sales#65, coalesce(returns#79, 0.00) AS returns#83, (profit#66 - coalesce(profit_loss#80, 0.00)) AS profit#84]
+Input [6]: [wp_web_page_sk#58, sales#65, profit#66, wp_web_page_sk#72, returns#79, profit_loss#80]
 
-(80) Union
+(74) Union
 
-(81) HashAggregate [codegen id : 23]
+(75) HashAggregate [codegen id : 21]
 Input [5]: [channel#30, id#31, sales#14, returns#32, profit#33]
 Keys [2]: [channel#30, id#31]
 Functions [3]: [partial_sum(sales#14), partial_sum(returns#32), partial_sum(profit#33)]
-Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99]
-Results [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105]
+Aggregate Attributes [6]: [sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90]
+Results [8]: [channel#30, id#31, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96]
 
-(82) HashAggregate [codegen id : 23]
-Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105]
+(76) HashAggregate [codegen id : 21]
+Input [8]: [channel#30, id#31, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96]
 Keys [2]: [channel#30, id#31]
 Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)]
-Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108]
-Results [5]: [channel#30, id#31, cast(sum(sales#14)#106 as decimal(37,2)) AS sales#109, cast(sum(returns#32)#107 as decimal(37,2)) AS returns#110, cast(sum(profit#33)#108 as decimal(38,2)) AS profit#111]
+Aggregate Attributes [3]: [sum(sales#14)#97, sum(returns#32)#98, sum(profit#33)#99]
+Results [5]: [channel#30, id#31, cast(sum(sales#14)#97 as decimal(37,2)) AS sales#100, cast(sum(returns#32)#98 as decimal(37,2)) AS returns#101, cast(sum(profit#33)#99 as decimal(38,2)) AS profit#102]
 
-(83) ReusedExchange [Reuses operator id: 14]
-Output [3]: [s_store_sk#112, sum#113, sum#114]
+(77) ReusedExchange [Reuses operator id: 14]
+Output [3]: [s_store_sk#103, sum#104, sum#105]
 
-(84) HashAggregate [codegen id : 31]
-Input [3]: [s_store_sk#112, sum#113, sum#114]
-Keys [1]: [s_store_sk#112]
-Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#115)), sum(UnscaledValue(ss_net_profit#116))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#115))#12, sum(UnscaledValue(ss_net_profit#116))#13]
-Results [3]: [s_store_sk#112, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#115))#12,17,2) AS sales#117, MakeDecimal(sum(UnscaledValue(ss_net_profit#116))#13,17,2) AS profit#118]
+(78) HashAggregate [codegen id : 29]
+Input [3]: [s_store_sk#103, sum#104, sum#105]
+Keys [1]: [s_store_sk#103]
+Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#106)), sum(UnscaledValue(ss_net_profit#107))]
+Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#106))#12, sum(UnscaledValue(ss_net_profit#107))#13]
+Results [3]: [s_store_sk#103, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#106))#12,17,2) AS sales#108, MakeDecimal(sum(UnscaledValue(ss_net_profit#107))#13,17,2) AS profit#109]
 
-(85) ReusedExchange [Reuses operator id: 28]
-Output [3]: [s_store_sk#119, returns#120, profit_loss#121]
+(79) ReusedExchange [Reuses operator id: 28]
+Output [3]: [s_store_sk#110, returns#111, profit_loss#112]
 
-(86) BroadcastHashJoin [codegen id : 31]
-Left keys [1]: [s_store_sk#112]
-Right keys [1]: [s_store_sk#119]
+(80) BroadcastHashJoin [codegen id : 29]
+Left keys [1]: [s_store_sk#103]
+Right keys [1]: [s_store_sk#110]
 Join type: LeftOuter
 Join condition: None
 
-(87) Project [codegen id : 31]
-Output [5]: [store channel AS channel#122, s_store_sk#112 AS id#123, sales#117, coalesce(returns#120, 0.00) AS returns#124, (profit#118 - coalesce(profit_loss#121, 0.00)) AS profit#125]
-Input [6]: [s_store_sk#112, sales#117, profit#118, s_store_sk#119, returns#120, profit_loss#121]
+(81) Project [codegen id : 29]
+Output [5]: [store channel AS channel#113, s_store_sk#103 AS id#114, sales#108, coalesce(returns#111, 0.00) AS returns#115, (profit#109 - coalesce(profit_loss#112, 0.00)) AS profit#116]
+Input [6]: [s_store_sk#103, sales#108, profit#109, s_store_sk#110, returns#111, profit_loss#112]
 
-(88) ReusedExchange [Reuses operator id: 37]
-Output [3]: [cs_call_center_sk#126, sum#127, sum#128]
+(82) ReusedExchange [Reuses operator id: 37]
+Output [3]: [cs_call_center_sk#117, sum#118, sum#119]
 
-(89) HashAggregate [codegen id : 37]
-Input [3]: [cs_call_center_sk#126, sum#127, sum#128]
-Keys [1]: [cs_call_center_sk#126]
-Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#129)), sum(UnscaledValue(cs_net_profit#130))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#129))#43, sum(UnscaledValue(cs_net_profit#130))#44]
-Results [3]: [cs_call_center_sk#126, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#129))#43,17,2) AS sales#131, MakeDecimal(sum(UnscaledValue(cs_net_profit#130))#44,17,2) AS profit#132]
+(83) HashAggregate [codegen id : 33]
+Input [3]: [cs_call_center_sk#117, sum#118, sum#119]
+Keys [1]: [cs_call_center_sk#117]
+Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#120)), sum(UnscaledValue(cs_net_profit#121))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#120))#43, sum(UnscaledValue(cs_net_profit#121))#44]
+Results [3]: [cs_call_center_sk#117, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#120))#43,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(cs_net_profit#121))#44,17,2) AS profit#123]
 
-(90) ReusedExchange [Reuses operator id: 47]
-Output [2]: [returns#133, profit_loss#134]
+(84) ReusedExchange [Reuses operator id: 41]
+Output [2]: [returns#124, profit_loss#125]
 
-(91) BroadcastNestedLoopJoin [codegen id : 37]
+(85) BroadcastNestedLoopJoin [codegen id : 33]
 Join type: Inner
 Join condition: None
 
-(92) Project [codegen id : 37]
-Output [5]: [catalog channel AS channel#135, cs_call_center_sk#126 AS id#136, sales#131, returns#133, (profit#132 - profit_loss#134) AS profit#137]
-Input [5]: [cs_call_center_sk#126, sales#131, profit#132, returns#133, profit_loss#134]
+(86) Project [codegen id : 33]
+Output [5]: [catalog channel AS channel#126, cs_call_center_sk#117 AS id#127, sales#122, returns#124, (profit#123 - profit_loss#125) AS profit#128]
+Input [5]: [cs_call_center_sk#117, sales#122, profit#123, returns#124, profit_loss#125]
 
-(93) ReusedExchange [Reuses operator id: 63]
-Output [3]: [wp_web_page_sk#138, sum#139, sum#140]
+(87) ReusedExchange [Reuses operator id: 57]
+Output [3]: [wp_web_page_sk#129, sum#130, sum#131]
 
-(94) HashAggregate [codegen id : 45]
-Input [3]: [wp_web_page_sk#138, sum#139, sum#140]
-Keys [1]: [wp_web_page_sk#138]
-Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#141)), sum(UnscaledValue(ws_net_profit#142))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#141))#72, sum(UnscaledValue(ws_net_profit#142))#73]
-Results [3]: [wp_web_page_sk#138, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#141))#72,17,2) AS sales#143, MakeDecimal(sum(UnscaledValue(ws_net_profit#142))#73,17,2) AS profit#144]
+(88) HashAggregate [codegen id : 41]
+Input [3]: [wp_web_page_sk#129, sum#130, sum#131]
+Keys [1]: [wp_web_page_sk#129]
+Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#132)), sum(UnscaledValue(ws_net_profit#133))]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#132))#63, sum(UnscaledValue(ws_net_profit#133))#64]
+Results [3]: [wp_web_page_sk#129, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#132))#63,17,2) AS sales#134, MakeDecimal(sum(UnscaledValue(ws_net_profit#133))#64,17,2) AS profit#135]
 
-(95) ReusedExchange [Reuses operator id: 77]
-Output [3]: [wp_web_page_sk#145, returns#146, profit_loss#147]
+(89) ReusedExchange [Reuses operator id: 71]
+Output [3]: [wp_web_page_sk#136, returns#137, profit_loss#138]
 
-(96) BroadcastHashJoin [codegen id : 45]
-Left keys [1]: [wp_web_page_sk#138]
-Right keys [1]: [wp_web_page_sk#145]
+(90) BroadcastHashJoin [codegen id : 41]
+Left keys [1]: [wp_web_page_sk#129]
+Right keys [1]: [wp_web_page_sk#136]
 Join type: LeftOuter
 Join condition: None
 
-(97) Project [codegen id : 45]
-Output [5]: [web channel AS channel#148, wp_web_page_sk#138 AS id#149, sales#143, coalesce(returns#146, 0.00) AS returns#150, (profit#144 - coalesce(profit_loss#147, 0.00)) AS profit#151]
-Input [6]: [wp_web_page_sk#138, sales#143, profit#144, wp_web_page_sk#145, returns#146, profit_loss#147]
-
-(98) Union
-
-(99) HashAggregate [codegen id : 46]
-Input [5]: [channel#122, id#123, sales#117, returns#124, profit#125]
-Keys [2]: [channel#122, id#123]
-Functions [3]: [partial_sum(sales#117), partial_sum(returns#124), partial_sum(profit#125)]
-Aggregate Attributes [6]: [sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157]
-Results [8]: [channel#122, id#123, sum#158, isEmpty#159, sum#160, isEmpty#161, sum#162, isEmpty#163]
-
-(100) HashAggregate [codegen id : 46]
-Input [8]: [channel#122, id#123, sum#158, isEmpty#159, sum#160, isEmpty#161, sum#162, isEmpty#163]
-Keys [2]: [channel#122, id#123]
-Functions [3]: [sum(sales#117), sum(returns#124), sum(profit#125)]
-Aggregate Attributes [3]: [sum(sales#117)#106, sum(returns#124)#107, sum(profit#125)#108]
-Results [4]: [channel#122, sum(sales#117)#106 AS sales#164, sum(returns#124)#107 AS returns#165, sum(profit#125)#108 AS profit#166]
-
-(101) HashAggregate [codegen id : 46]
-Input [4]: [channel#122, sales#164, returns#165, profit#166]
-Keys [1]: [channel#122]
-Functions [3]: [partial_sum(sales#164), partial_sum(returns#165), partial_sum(profit#166)]
-Aggregate Attributes [6]: [sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172]
-Results [7]: [channel#122, sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178]
-
-(102) Exchange
-Input [7]: [channel#122, sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178]
-Arguments: hashpartitioning(channel#122, 5), ENSURE_REQUIREMENTS, [plan_id=12]
-
-(103) HashAggregate [codegen id : 47]
-Input [7]: [channel#122, sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178]
-Keys [1]: [channel#122]
-Functions [3]: [sum(sales#164), sum(returns#165), sum(profit#166)]
-Aggregate Attributes [3]: [sum(sales#164)#179, sum(returns#165)#180, sum(profit#166)#181]
-Results [5]: [channel#122, null AS id#182, sum(sales#164)#179 AS sales#183, sum(returns#165)#180 AS returns#184, sum(profit#166)#181 AS profit#185]
-
-(104) ReusedExchange [Reuses operator id: 14]
-Output [3]: [s_store_sk#186, sum#187, sum#188]
-
-(105) HashAggregate [codegen id : 55]
-Input [3]: [s_store_sk#186, sum#187, sum#188]
-Keys [1]: [s_store_sk#186]
-Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#189)), sum(UnscaledValue(ss_net_profit#190))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#189))#12, sum(UnscaledValue(ss_net_profit#190))#13]
-Results [3]: [s_store_sk#186, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#189))#12,17,2) AS sales#191, MakeDecimal(sum(UnscaledValue(ss_net_profit#190))#13,17,2) AS profit#192]
-
-(106) ReusedExchange [Reuses operator id: 28]
-Output [3]: [s_store_sk#193, returns#194, profit_loss#195]
-
-(107) BroadcastHashJoin [codegen id : 55]
-Left keys [1]: [s_store_sk#186]
-Right keys [1]: [s_store_sk#193]
+(91) Project [codegen id : 41]
+Output [5]: [web channel AS channel#139, wp_web_page_sk#129 AS id#140, sales#134, coalesce(returns#137, 0.00) AS returns#141, (profit#135 - coalesce(profit_loss#138, 0.00)) AS profit#142]
+Input [6]: [wp_web_page_sk#129, sales#134, profit#135, wp_web_page_sk#136, returns#137, profit_loss#138]
+
+(92) Union
+
+(93) HashAggregate [codegen id : 42]
+Input [5]: [channel#113, id#114, sales#108, returns#115, profit#116]
+Keys [2]: [channel#113, id#114]
+Functions [3]: [partial_sum(sales#108), partial_sum(returns#115), partial_sum(profit#116)]
+Aggregate Attributes [6]: [sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148]
+Results [8]: [channel#113, id#114, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154]
+
+(94) HashAggregate [codegen id : 42]
+Input [8]: [channel#113, id#114, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154]
+Keys [2]: [channel#113, id#114]
+Functions [3]: [sum(sales#108), sum(returns#115), sum(profit#116)]
+Aggregate Attributes [3]: [sum(sales#108)#97, sum(returns#115)#98, sum(profit#116)#99]
+Results [4]: [channel#113, sum(sales#108)#97 AS sales#155, sum(returns#115)#98 AS returns#156, sum(profit#116)#99 AS profit#157]
+
+(95) HashAggregate [codegen id : 42]
+Input [4]: [channel#113, sales#155, returns#156, profit#157]
+Keys [1]: [channel#113]
+Functions [3]: [partial_sum(sales#155), partial_sum(returns#156), partial_sum(profit#157)]
+Aggregate Attributes [6]: [sum#158, isEmpty#159, sum#160, isEmpty#161, sum#162, isEmpty#163]
+Results [7]: [channel#113, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169]
+
+(96) Exchange
+Input [7]: [channel#113, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169]
+Arguments: hashpartitioning(channel#113, 5), ENSURE_REQUIREMENTS, [plan_id=12]
+
+(97) HashAggregate [codegen id : 43]
+Input [7]: [channel#113, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169]
+Keys [1]: [channel#113]
+Functions [3]: [sum(sales#155), sum(returns#156), sum(profit#157)]
+Aggregate Attributes [3]: [sum(sales#155)#170, sum(returns#156)#171, sum(profit#157)#172]
+Results [5]: [channel#113, null AS id#173, sum(sales#155)#170 AS sales#174, sum(returns#156)#171 AS returns#175, sum(profit#157)#172 AS profit#176]
+
+(98) ReusedExchange [Reuses operator id: 14]
+Output [3]: [s_store_sk#177, sum#178, sum#179]
+
+(99) HashAggregate [codegen id : 51]
+Input [3]: [s_store_sk#177, sum#178, sum#179]
+Keys [1]: [s_store_sk#177]
+Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#180)), sum(UnscaledValue(ss_net_profit#181))]
+Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#180))#12, sum(UnscaledValue(ss_net_profit#181))#13]
+Results [3]: [s_store_sk#177, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#180))#12,17,2) AS sales#182, MakeDecimal(sum(UnscaledValue(ss_net_profit#181))#13,17,2) AS profit#183]
+
+(100) ReusedExchange [Reuses operator id: 28]
+Output [3]: [s_store_sk#184, returns#185, profit_loss#186]
+
+(101) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [s_store_sk#177]
+Right keys [1]: [s_store_sk#184]
 Join type: LeftOuter
 Join condition: None
 
-(108) Project [codegen id : 55]
-Output [5]: [store channel AS channel#196, s_store_sk#186 AS id#197, sales#191, coalesce(returns#194, 0.00) AS returns#198, (profit#192 - coalesce(profit_loss#195, 0.00)) AS profit#199]
-Input [6]: [s_store_sk#186, sales#191, profit#192, s_store_sk#193, returns#194, profit_loss#195]
+(102) Project [codegen id : 51]
+Output [5]: [store channel AS channel#187, s_store_sk#177 AS id#188, sales#182, coalesce(returns#185, 0.00) AS returns#189, (profit#183 - coalesce(profit_loss#186, 0.00)) AS profit#190]
+Input [6]: [s_store_sk#177, sales#182, profit#183, s_store_sk#184, returns#185, profit_loss#186]
 
-(109) ReusedExchange [Reuses operator id: 37]
-Output [3]: [cs_call_center_sk#200, sum#201, sum#202]
+(103) ReusedExchange [Reuses operator id: 37]
+Output [3]: [cs_call_center_sk#191, sum#192, sum#193]
 
-(110) HashAggregate [codegen id : 61]
-Input [3]: [cs_call_center_sk#200, sum#201, sum#202]
-Keys [1]: [cs_call_center_sk#200]
-Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#203)), sum(UnscaledValue(cs_net_profit#204))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#203))#43, sum(UnscaledValue(cs_net_profit#204))#44]
-Results [3]: [cs_call_center_sk#200, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#203))#43,17,2) AS sales#205, MakeDecimal(sum(UnscaledValue(cs_net_profit#204))#44,17,2) AS profit#206]
+(104) HashAggregate [codegen id : 55]
+Input [3]: [cs_call_center_sk#191, sum#192, sum#193]
+Keys [1]: [cs_call_center_sk#191]
+Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#194)), sum(UnscaledValue(cs_net_profit#195))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#194))#43, sum(UnscaledValue(cs_net_profit#195))#44]
+Results [3]: [cs_call_center_sk#191, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#194))#43,17,2) AS sales#196, MakeDecimal(sum(UnscaledValue(cs_net_profit#195))#44,17,2) AS profit#197]
 
-(111) ReusedExchange [Reuses operator id: 47]
-Output [2]: [returns#207, profit_loss#208]
+(105) ReusedExchange [Reuses operator id: 41]
+Output [2]: [returns#198, profit_loss#199]
 
-(112) BroadcastNestedLoopJoin [codegen id : 61]
+(106) BroadcastNestedLoopJoin [codegen id : 55]
 Join type: Inner
 Join condition: None
 
-(113) Project [codegen id : 61]
-Output [5]: [catalog channel AS channel#209, cs_call_center_sk#200 AS id#210, sales#205, returns#207, (profit#206 - profit_loss#208) AS profit#211]
-Input [5]: [cs_call_center_sk#200, sales#205, profit#206, returns#207, profit_loss#208]
+(107) Project [codegen id : 55]
+Output [5]: [catalog channel AS channel#200, cs_call_center_sk#191 AS id#201, sales#196, returns#198, (profit#197 - profit_loss#199) AS profit#202]
+Input [5]: [cs_call_center_sk#191, sales#196, profit#197, returns#198, profit_loss#199]
 
-(114) ReusedExchange [Reuses operator id: 63]
-Output [3]: [wp_web_page_sk#212, sum#213, sum#214]
+(108) ReusedExchange [Reuses operator id: 57]
+Output [3]: [wp_web_page_sk#203, sum#204, sum#205]
 
-(115) HashAggregate [codegen id : 69]
-Input [3]: [wp_web_page_sk#212, sum#213, sum#214]
-Keys [1]: [wp_web_page_sk#212]
-Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#215)), sum(UnscaledValue(ws_net_profit#216))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#215))#72, sum(UnscaledValue(ws_net_profit#216))#73]
-Results [3]: [wp_web_page_sk#212, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#215))#72,17,2) AS sales#217, MakeDecimal(sum(UnscaledValue(ws_net_profit#216))#73,17,2) AS profit#218]
+(109) HashAggregate [codegen id : 63]
+Input [3]: [wp_web_page_sk#203, sum#204, sum#205]
+Keys [1]: [wp_web_page_sk#203]
+Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#206)), sum(UnscaledValue(ws_net_profit#207))]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#206))#63, sum(UnscaledValue(ws_net_profit#207))#64]
+Results [3]: [wp_web_page_sk#203, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#206))#63,17,2) AS sales#208, MakeDecimal(sum(UnscaledValue(ws_net_profit#207))#64,17,2) AS profit#209]
 
-(116) ReusedExchange [Reuses operator id: 77]
-Output [3]: [wp_web_page_sk#219, returns#220, profit_loss#221]
+(110) ReusedExchange [Reuses operator id: 71]
+Output [3]: [wp_web_page_sk#210, returns#211, profit_loss#212]
 
-(117) BroadcastHashJoin [codegen id : 69]
-Left keys [1]: [wp_web_page_sk#212]
-Right keys [1]: [wp_web_page_sk#219]
+(111) BroadcastHashJoin [codegen id : 63]
+Left keys [1]: [wp_web_page_sk#203]
+Right keys [1]: [wp_web_page_sk#210]
 Join type: LeftOuter
 Join condition: None
 
-(118) Project [codegen id : 69]
-Output [5]: [web channel AS channel#222, wp_web_page_sk#212 AS id#223, sales#217, coalesce(returns#220, 0.00) AS returns#224, (profit#218 - coalesce(profit_loss#221, 0.00)) AS profit#225]
-Input [6]: [wp_web_page_sk#212, sales#217, profit#218, wp_web_page_sk#219, returns#220, profit_loss#221]
+(112) Project [codegen id : 63]
+Output [5]: [web channel AS channel#213, wp_web_page_sk#203 AS id#214, sales#208, coalesce(returns#211, 0.00) AS returns#215, (profit#209 - coalesce(profit_loss#212, 0.00)) AS profit#216]
+Input [6]: [wp_web_page_sk#203, sales#208, profit#209, wp_web_page_sk#210, returns#211, profit_loss#212]
 
-(119) Union
+(113) Union
+
+(114) HashAggregate [codegen id : 64]
+Input [5]: [channel#187, id#188, sales#182, returns#189, profit#190]
+Keys [2]: [channel#187, id#188]
+Functions [3]: [partial_sum(sales#182), partial_sum(returns#189), partial_sum(profit#190)]
+Aggregate Attributes [6]: [sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222]
+Results [8]: [channel#187, id#188, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228]
+
+(115) HashAggregate [codegen id : 64]
+Input [8]: [channel#187, id#188, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228]
+Keys [2]: [channel#187, id#188]
+Functions [3]: [sum(sales#182), sum(returns#189), sum(profit#190)]
+Aggregate Attributes [3]: [sum(sales#182)#97, sum(returns#189)#98, sum(profit#190)#99]
+Results [3]: [sum(sales#182)#97 AS sales#229, sum(returns#189)#98 AS returns#230, sum(profit#190)#99 AS profit#231]
 
-(120) HashAggregate [codegen id : 70]
-Input [5]: [channel#196, id#197, sales#191, returns#198, profit#199]
-Keys [2]: [channel#196, id#197]
-Functions [3]: [partial_sum(sales#191), partial_sum(returns#198), partial_sum(profit#199)]
-Aggregate Attributes [6]: [sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231]
-Results [8]: [channel#196, id#197, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237]
-
-(121) HashAggregate [codegen id : 70]
-Input [8]: [channel#196, id#197, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237]
-Keys [2]: [channel#196, id#197]
-Functions [3]: [sum(sales#191), sum(returns#198), sum(profit#199)]
-Aggregate Attributes [3]: [sum(sales#191)#106, sum(returns#198)#107, sum(profit#199)#108]
-Results [3]: [sum(sales#191)#106 AS sales#238, sum(returns#198)#107 AS returns#239, sum(profit#199)#108 AS profit#240]
-
-(122) HashAggregate [codegen id : 70]
-Input [3]: [sales#238, returns#239, profit#240]
+(116) HashAggregate [codegen id : 64]
+Input [3]: [sales#229, returns#230, profit#231]
 Keys: []
-Functions [3]: [partial_sum(sales#238), partial_sum(returns#239), partial_sum(profit#240)]
-Aggregate Attributes [6]: [sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246]
-Results [6]: [sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252]
+Functions [3]: [partial_sum(sales#229), partial_sum(returns#230), partial_sum(profit#231)]
+Aggregate Attributes [6]: [sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237]
+Results [6]: [sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243]
 
-(123) Exchange
-Input [6]: [sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252]
+(117) Exchange
+Input [6]: [sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13]
 
-(124) HashAggregate [codegen id : 71]
-Input [6]: [sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252]
+(118) HashAggregate [codegen id : 65]
+Input [6]: [sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243]
 Keys: []
-Functions [3]: [sum(sales#238), sum(returns#239), sum(profit#240)]
-Aggregate Attributes [3]: [sum(sales#238)#253, sum(returns#239)#254, sum(profit#240)#255]
-Results [5]: [null AS channel#256, null AS id#257, sum(sales#238)#253 AS sales#258, sum(returns#239)#254 AS returns#259, sum(profit#240)#255 AS profit#260]
+Functions [3]: [sum(sales#229), sum(returns#230), sum(profit#231)]
+Aggregate Attributes [3]: [sum(sales#229)#244, sum(returns#230)#245, sum(profit#231)#246]
+Results [5]: [null AS channel#247, null AS id#248, sum(sales#229)#244 AS sales#249, sum(returns#230)#245 AS returns#250, sum(profit#231)#246 AS profit#251]
 
-(125) Union
+(119) Union
 
-(126) HashAggregate [codegen id : 72]
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+(120) HashAggregate [codegen id : 66]
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Keys [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+Results [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 
-(127) Exchange
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Arguments: hashpartitioning(channel#30, id#31, sales#109, returns#110, profit#111, 5), ENSURE_REQUIREMENTS, [plan_id=14]
+(121) Exchange
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Arguments: hashpartitioning(channel#30, id#31, sales#100, returns#101, profit#102, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
-(128) HashAggregate [codegen id : 73]
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+(122) HashAggregate [codegen id : 67]
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Keys [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+Results [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 
-(129) TakeOrderedAndProject
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#109, returns#110, profit#111]
+(123) TakeOrderedAndProject
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#100, returns#101, profit#102]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (134)
-+- * Project (133)
-   +- * Filter (132)
-      +- * ColumnarToRow (131)
-         +- Scan parquet spark_catalog.default.date_dim (130)
+BroadcastExchange (128)
++- * Project (127)
+   +- * Filter (126)
+      +- * ColumnarToRow (125)
+         +- Scan parquet spark_catalog.default.date_dim (124)
 
 
-(130) Scan parquet spark_catalog.default.date_dim
-Output [2]: [d_date_sk#6, d_date#261]
+(124) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#6, d_date#252]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
-(131) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#6, d_date#261]
+(125) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#6, d_date#252]
 
-(132) Filter [codegen id : 1]
-Input [2]: [d_date_sk#6, d_date#261]
-Condition : (((isnotnull(d_date#261) AND (d_date#261 >= 1998-08-04)) AND (d_date#261 <= 1998-09-03)) AND isnotnull(d_date_sk#6))
+(126) Filter [codegen id : 1]
+Input [2]: [d_date_sk#6, d_date#252]
+Condition : (((isnotnull(d_date#252) AND (d_date#252 >= 1998-08-04)) AND (d_date#252 <= 1998-09-03)) AND isnotnull(d_date_sk#6))
 
-(133) Project [codegen id : 1]
+(127) Project [codegen id : 1]
 Output [1]: [d_date_sk#6]
-Input [2]: [d_date_sk#6, d_date#261]
+Input [2]: [d_date_sk#6, d_date#252]
 
-(134) BroadcastExchange
+(128) BroadcastExchange
 Input [1]: [d_date_sk#6]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15]
 
@@ -796,10 +756,69 @@ Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#19
 
 Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#37 IN dynamicpruning#5
 
-Subquery:4 Hosting operator id = 39 Hosting Expression = cr_returned_date_sk#49 IN dynamicpruning#5
+Subquery:4 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#47, [id=#6]
+* Project (137)
++- * HashAggregate (136)
+   +- Exchange (135)
+      +- * HashAggregate (134)
+         +- * Project (133)
+            +- * BroadcastHashJoin Inner BuildRight (132)
+               :- * ColumnarToRow (130)
+               :  +- Scan parquet spark_catalog.default.catalog_returns (129)
+               +- ReusedExchange (131)
+
+
+(129) Scan parquet spark_catalog.default.catalog_returns
+Output [3]: [cr_return_amount#253, cr_net_loss#254, cr_returned_date_sk#255]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(cr_returned_date_sk#255), dynamicpruningexpression(cr_returned_date_sk#255 IN dynamicpruning#5)]
+ReadSchema: struct<cr_return_amount:decimal(7,2),cr_net_loss:decimal(7,2)>
+
+(130) ColumnarToRow [codegen id : 2]
+Input [3]: [cr_return_amount#253, cr_net_loss#254, cr_returned_date_sk#255]
+
+(131) ReusedExchange [Reuses operator id: 128]
+Output [1]: [d_date_sk#256]
+
+(132) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [cr_returned_date_sk#255]
+Right keys [1]: [d_date_sk#256]
+Join type: Inner
+Join condition: None
+
+(133) Project [codegen id : 2]
+Output [2]: [cr_return_amount#253, cr_net_loss#254]
+Input [4]: [cr_return_amount#253, cr_net_loss#254, cr_returned_date_sk#255, d_date_sk#256]
+
+(134) HashAggregate [codegen id : 2]
+Input [2]: [cr_return_amount#253, cr_net_loss#254]
+Keys: []
+Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#253)), partial_sum(UnscaledValue(cr_net_loss#254))]
+Aggregate Attributes [2]: [sum#257, sum#258]
+Results [2]: [sum#259, sum#260]
+
+(135) Exchange
+Input [2]: [sum#259, sum#260]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16]
+
+(136) HashAggregate [codegen id : 3]
+Input [2]: [sum#259, sum#260]
+Keys: []
+Functions [2]: [sum(UnscaledValue(cr_return_amount#253)), sum(UnscaledValue(cr_net_loss#254))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#253))#261, sum(UnscaledValue(cr_net_loss#254))#262]
+Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#253))#261,17,2) AS returns#48, MakeDecimal(sum(UnscaledValue(cr_net_loss#254))#262,17,2) AS profit_loss#49]
+
+(137) Project [codegen id : 3]
+Output [1]: [named_struct(returns, returns#48, profit_loss, profit_loss#49) AS mergedValue#263]
+Input [2]: [returns#48, profit_loss#49]
+
+Subquery:5 Hosting operator id = 129 Hosting Expression = cr_returned_date_sk#255 IN dynamicpruning#5
+
+Subquery:6 Hosting operator id = 40 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#6]
 
-Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#5
+Subquery:7 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#56 IN dynamicpruning#5
 
-Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#79 IN dynamicpruning#5
+Subquery:8 Hosting operator id = 59 Hosting Expression = wr_returned_date_sk#70 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
index e82c8494b698d..dcbbecaee5965 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
@@ -1,13 +1,13 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (73)
+  WholeStageCodegen (67)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (72)
+          WholeStageCodegen (66)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (23)
+                  WholeStageCodegen (21)
                     HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                         InputAdapter
@@ -66,7 +66,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             ReusedExchange [d_date_sk] #3
                                                       InputAdapter
                                                         ReusedExchange [s_store_sk] #4
-                            WholeStageCodegen (14)
+                            WholeStageCodegen (12)
                               Project [cs_call_center_sk,sales,returns,profit,profit_loss]
                                 BroadcastNestedLoopJoin
                                   HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum]
@@ -84,27 +84,33 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   ReusedExchange [d_date_sk] #3
                                   InputAdapter
                                     BroadcastExchange #8
-                                      WholeStageCodegen (13)
-                                        HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
-                                          InputAdapter
-                                            Exchange #9
-                                              WholeStageCodegen (12)
-                                                HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
-                                                  Project [cr_return_amount,cr_net_loss]
-                                                    BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
-                                                            ReusedSubquery [d_date_sk] #1
-                                                      InputAdapter
-                                                        ReusedExchange [d_date_sk] #3
-                            WholeStageCodegen (22)
+                                      WholeStageCodegen (11)
+                                        Project
+                                          Subquery #2
+                                            WholeStageCodegen (3)
+                                              Project [returns,profit_loss]
+                                                HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
+                                                  InputAdapter
+                                                    Exchange #9
+                                                      WholeStageCodegen (2)
+                                                        HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
+                                                          Project [cr_return_amount,cr_net_loss]
+                                                            BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #1
+                                                              InputAdapter
+                                                                ReusedExchange [d_date_sk] #3
+                                          ReusedSubquery [mergedValue] #2
+                                          Scan OneRowRelation
+                            WholeStageCodegen (20)
                               Project [wp_web_page_sk,sales,returns,profit,profit_loss]
                                 BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
                                   HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
                                     InputAdapter
                                       Exchange [wp_web_page_sk] #10
-                                        WholeStageCodegen (17)
+                                        WholeStageCodegen (15)
                                           HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum]
                                             Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk]
                                               BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
@@ -119,18 +125,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                       ReusedExchange [d_date_sk] #3
                                                 InputAdapter
                                                   BroadcastExchange #11
-                                                    WholeStageCodegen (16)
+                                                    WholeStageCodegen (14)
                                                       Filter [wp_web_page_sk]
                                                         ColumnarToRow
                                                           InputAdapter
                                                             Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                                   InputAdapter
                                     BroadcastExchange #12
-                                      WholeStageCodegen (21)
+                                      WholeStageCodegen (19)
                                         HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum]
                                           InputAdapter
                                             Exchange [wp_web_page_sk] #13
-                                              WholeStageCodegen (20)
+                                              WholeStageCodegen (18)
                                                 HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum]
                                                   Project [wr_return_amt,wr_net_loss,wp_web_page_sk]
                                                     BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk]
@@ -145,17 +151,17 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             ReusedExchange [d_date_sk] #3
                                                       InputAdapter
                                                         ReusedExchange [wp_web_page_sk] #11
-                  WholeStageCodegen (47)
+                  WholeStageCodegen (43)
                     HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [channel] #14
-                          WholeStageCodegen (46)
+                          WholeStageCodegen (42)
                             HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                   InputAdapter
                                     Union
-                                      WholeStageCodegen (31)
+                                      WholeStageCodegen (29)
                                         Project [s_store_sk,sales,returns,profit,profit_loss]
                                           BroadcastHashJoin [s_store_sk,s_store_sk]
                                             HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum]
@@ -163,7 +169,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 ReusedExchange [s_store_sk,sum,sum] #2
                                             InputAdapter
                                               ReusedExchange [s_store_sk,returns,profit_loss] #5
-                                      WholeStageCodegen (37)
+                                      WholeStageCodegen (33)
                                         Project [cs_call_center_sk,sales,returns,profit,profit_loss]
                                           BroadcastNestedLoopJoin
                                             HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum]
@@ -171,7 +177,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 ReusedExchange [cs_call_center_sk,sum,sum] #7
                                             InputAdapter
                                               ReusedExchange [returns,profit_loss] #8
-                                      WholeStageCodegen (45)
+                                      WholeStageCodegen (41)
                                         Project [wp_web_page_sk,sales,returns,profit,profit_loss]
                                           BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
                                             HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
@@ -179,17 +185,17 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 ReusedExchange [wp_web_page_sk,sum,sum] #10
                                             InputAdapter
                                               ReusedExchange [wp_web_page_sk,returns,profit_loss] #12
-                  WholeStageCodegen (71)
+                  WholeStageCodegen (65)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange #15
-                          WholeStageCodegen (70)
+                          WholeStageCodegen (64)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                   InputAdapter
                                     Union
-                                      WholeStageCodegen (55)
+                                      WholeStageCodegen (51)
                                         Project [s_store_sk,sales,returns,profit,profit_loss]
                                           BroadcastHashJoin [s_store_sk,s_store_sk]
                                             HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum]
@@ -197,7 +203,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 ReusedExchange [s_store_sk,sum,sum] #2
                                             InputAdapter
                                               ReusedExchange [s_store_sk,returns,profit_loss] #5
-                                      WholeStageCodegen (61)
+                                      WholeStageCodegen (55)
                                         Project [cs_call_center_sk,sales,returns,profit,profit_loss]
                                           BroadcastNestedLoopJoin
                                             HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum]
@@ -205,7 +211,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 ReusedExchange [cs_call_center_sk,sum,sum] #7
                                             InputAdapter
                                               ReusedExchange [returns,profit_loss] #8
-                                      WholeStageCodegen (69)
+                                      WholeStageCodegen (63)
                                         Project [wp_web_page_sk,sales,returns,profit,profit_loss]
                                           BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
                                             HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
index a74cb2cb75801..a97d333962f31 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
@@ -1,13 +1,13 @@
 == Physical Plan ==
-TakeOrderedAndProject (98)
-+- * HashAggregate (97)
-   +- Exchange (96)
-      +- * HashAggregate (95)
-         +- Union (94)
-            :- * HashAggregate (83)
-            :  +- Exchange (82)
-            :     +- * HashAggregate (81)
-            :        +- Union (80)
+TakeOrderedAndProject (92)
++- * HashAggregate (91)
+   +- Exchange (90)
+      +- * HashAggregate (89)
+         +- Union (88)
+            :- * HashAggregate (77)
+            :  +- Exchange (76)
+            :     +- * HashAggregate (75)
+            :        +- Union (74)
             :           :- * Project (30)
             :           :  +- * BroadcastHashJoin LeftOuter BuildRight (29)
             :           :     :- * HashAggregate (15)
@@ -38,8 +38,8 @@ TakeOrderedAndProject (98)
             :           :                       :     :     +- Scan parquet spark_catalog.default.store_returns (16)
             :           :                       :     +- ReusedExchange (19)
             :           :                       +- ReusedExchange (22)
-            :           :- * Project (49)
-            :           :  +- * BroadcastNestedLoopJoin Inner BuildLeft (48)
+            :           :- * Project (43)
+            :           :  +- * BroadcastNestedLoopJoin Inner BuildLeft (42)
             :           :     :- BroadcastExchange (39)
             :           :     :  +- * HashAggregate (38)
             :           :     :     +- Exchange (37)
@@ -49,54 +49,48 @@ TakeOrderedAndProject (98)
             :           :     :                 :- * ColumnarToRow (32)
             :           :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (31)
             :           :     :                 +- ReusedExchange (33)
-            :           :     +- * HashAggregate (47)
-            :           :        +- Exchange (46)
-            :           :           +- * HashAggregate (45)
-            :           :              +- * Project (44)
-            :           :                 +- * BroadcastHashJoin Inner BuildRight (43)
-            :           :                    :- * ColumnarToRow (41)
-            :           :                    :  +- Scan parquet spark_catalog.default.catalog_returns (40)
-            :           :                    +- ReusedExchange (42)
-            :           +- * Project (79)
-            :              +- * BroadcastHashJoin LeftOuter BuildRight (78)
-            :                 :- * HashAggregate (64)
-            :                 :  +- Exchange (63)
-            :                 :     +- * HashAggregate (62)
-            :                 :        +- * Project (61)
-            :                 :           +- * BroadcastHashJoin Inner BuildRight (60)
-            :                 :              :- * Project (55)
-            :                 :              :  +- * BroadcastHashJoin Inner BuildRight (54)
-            :                 :              :     :- * Filter (52)
-            :                 :              :     :  +- * ColumnarToRow (51)
-            :                 :              :     :     +- Scan parquet spark_catalog.default.web_sales (50)
-            :                 :              :     +- ReusedExchange (53)
-            :                 :              +- BroadcastExchange (59)
-            :                 :                 +- * Filter (58)
-            :                 :                    +- * ColumnarToRow (57)
-            :                 :                       +- Scan parquet spark_catalog.default.web_page (56)
-            :                 +- BroadcastExchange (77)
-            :                    +- * HashAggregate (76)
-            :                       +- Exchange (75)
-            :                          +- * HashAggregate (74)
-            :                             +- * Project (73)
-            :                                +- * BroadcastHashJoin Inner BuildRight (72)
-            :                                   :- * Project (70)
-            :                                   :  +- * BroadcastHashJoin Inner BuildRight (69)
-            :                                   :     :- * Filter (67)
-            :                                   :     :  +- * ColumnarToRow (66)
-            :                                   :     :     +- Scan parquet spark_catalog.default.web_returns (65)
-            :                                   :     +- ReusedExchange (68)
-            :                                   +- ReusedExchange (71)
-            :- * HashAggregate (88)
-            :  +- Exchange (87)
-            :     +- * HashAggregate (86)
-            :        +- * HashAggregate (85)
-            :           +- ReusedExchange (84)
-            +- * HashAggregate (93)
-               +- Exchange (92)
-                  +- * HashAggregate (91)
-                     +- * HashAggregate (90)
-                        +- ReusedExchange (89)
+            :           :     +- * Project (41)
+            :           :        +- * Scan OneRowRelation (40)
+            :           +- * Project (73)
+            :              +- * BroadcastHashJoin LeftOuter BuildRight (72)
+            :                 :- * HashAggregate (58)
+            :                 :  +- Exchange (57)
+            :                 :     +- * HashAggregate (56)
+            :                 :        +- * Project (55)
+            :                 :           +- * BroadcastHashJoin Inner BuildRight (54)
+            :                 :              :- * Project (49)
+            :                 :              :  +- * BroadcastHashJoin Inner BuildRight (48)
+            :                 :              :     :- * Filter (46)
+            :                 :              :     :  +- * ColumnarToRow (45)
+            :                 :              :     :     +- Scan parquet spark_catalog.default.web_sales (44)
+            :                 :              :     +- ReusedExchange (47)
+            :                 :              +- BroadcastExchange (53)
+            :                 :                 +- * Filter (52)
+            :                 :                    +- * ColumnarToRow (51)
+            :                 :                       +- Scan parquet spark_catalog.default.web_page (50)
+            :                 +- BroadcastExchange (71)
+            :                    +- * HashAggregate (70)
+            :                       +- Exchange (69)
+            :                          +- * HashAggregate (68)
+            :                             +- * Project (67)
+            :                                +- * BroadcastHashJoin Inner BuildRight (66)
+            :                                   :- * Project (64)
+            :                                   :  +- * BroadcastHashJoin Inner BuildRight (63)
+            :                                   :     :- * Filter (61)
+            :                                   :     :  +- * ColumnarToRow (60)
+            :                                   :     :     +- Scan parquet spark_catalog.default.web_returns (59)
+            :                                   :     +- ReusedExchange (62)
+            :                                   +- ReusedExchange (65)
+            :- * HashAggregate (82)
+            :  +- Exchange (81)
+            :     +- * HashAggregate (80)
+            :        +- * HashAggregate (79)
+            :           +- ReusedExchange (78)
+            +- * HashAggregate (87)
+               +- Exchange (86)
+                  +- * HashAggregate (85)
+                     +- * HashAggregate (84)
+                        +- ReusedExchange (83)
 
 
 (1) Scan parquet spark_catalog.default.store_sales
@@ -114,7 +108,7 @@ Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_s
 Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_store_sk#1)
 
-(4) ReusedExchange [Reuses operator id: 103]
+(4) ReusedExchange [Reuses operator id: 97]
 Output [1]: [d_date_sk#6]
 
 (5) BroadcastHashJoin [codegen id : 3]
@@ -188,7 +182,7 @@ Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_s
 Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19]
 Condition : isnotnull(sr_store_sk#16)
 
-(19) ReusedExchange [Reuses operator id: 103]
+(19) ReusedExchange [Reuses operator id: 97]
 Output [1]: [d_date_sk#20]
 
 (20) BroadcastHashJoin [codegen id : 6]
@@ -256,7 +250,7 @@ ReadSchema: struct<cs_call_center_sk:int,cs_ext_sales_price:decimal(7,2),cs_net_
 (32) ColumnarToRow [codegen id : 10]
 Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37]
 
-(33) ReusedExchange [Reuses operator id: 103]
+(33) ReusedExchange [Reuses operator id: 97]
 Output [1]: [d_date_sk#38]
 
 (34) BroadcastHashJoin [codegen id : 10]
@@ -291,331 +285,297 @@ Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_p
 Input [3]: [cs_call_center_sk#34, sales#45, profit#46]
 Arguments: IdentityBroadcastMode, [plan_id=6]
 
-(40) Scan parquet spark_catalog.default.catalog_returns
-Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
-Batched: true
-Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cr_returned_date_sk#49), dynamicpruningexpression(cr_returned_date_sk#49 IN dynamicpruning#5)]
-ReadSchema: struct<cr_return_amount:decimal(7,2),cr_net_loss:decimal(7,2)>
-
-(41) ColumnarToRow [codegen id : 13]
-Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
-
-(42) ReusedExchange [Reuses operator id: 103]
-Output [1]: [d_date_sk#50]
-
-(43) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [cr_returned_date_sk#49]
-Right keys [1]: [d_date_sk#50]
-Join type: Inner
-Join condition: None
-
-(44) Project [codegen id : 13]
-Output [2]: [cr_return_amount#47, cr_net_loss#48]
-Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50]
+(40) Scan OneRowRelation
+Output: []
 
-(45) HashAggregate [codegen id : 13]
-Input [2]: [cr_return_amount#47, cr_net_loss#48]
-Keys: []
-Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))]
-Aggregate Attributes [2]: [sum#51, sum#52]
-Results [2]: [sum#53, sum#54]
-
-(46) Exchange
-Input [2]: [sum#53, sum#54]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7]
+(41) Project
+Output [2]: [Subquery scalar-subquery#47, [id=#7].returns AS returns#48, ReusedSubquery Subquery scalar-subquery#47, [id=#7].profit_loss AS profit_loss#49]
+Input: []
 
-(47) HashAggregate
-Input [2]: [sum#53, sum#54]
-Keys: []
-Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56]
-Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58]
-
-(48) BroadcastNestedLoopJoin [codegen id : 14]
+(42) BroadcastNestedLoopJoin [codegen id : 12]
 Join type: Inner
 Join condition: None
 
-(49) Project [codegen id : 14]
-Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#61]
-Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58]
+(43) Project [codegen id : 12]
+Output [5]: [catalog channel AS channel#50, cs_call_center_sk#34 AS id#51, sales#45, returns#48, (profit#46 - profit_loss#49) AS profit#52]
+Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#48, profit_loss#49]
 
-(50) Scan parquet spark_catalog.default.web_sales
-Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
+(44) Scan parquet spark_catalog.default.web_sales
+Output [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#56), dynamicpruningexpression(ws_sold_date_sk#56 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(ws_web_page_sk)]
 ReadSchema: struct<ws_web_page_sk:int,ws_ext_sales_price:decimal(7,2),ws_net_profit:decimal(7,2)>
 
-(51) ColumnarToRow [codegen id : 17]
-Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
+(45) ColumnarToRow [codegen id : 15]
+Input [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56]
 
-(52) Filter [codegen id : 17]
-Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
-Condition : isnotnull(ws_web_page_sk#62)
+(46) Filter [codegen id : 15]
+Input [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56]
+Condition : isnotnull(ws_web_page_sk#53)
 
-(53) ReusedExchange [Reuses operator id: 103]
-Output [1]: [d_date_sk#66]
+(47) ReusedExchange [Reuses operator id: 97]
+Output [1]: [d_date_sk#57]
 
-(54) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_sold_date_sk#65]
-Right keys [1]: [d_date_sk#66]
+(48) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [ws_sold_date_sk#56]
+Right keys [1]: [d_date_sk#57]
 Join type: Inner
 Join condition: None
 
-(55) Project [codegen id : 17]
-Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64]
-Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66]
+(49) Project [codegen id : 15]
+Output [3]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55]
+Input [5]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, ws_sold_date_sk#56, d_date_sk#57]
 
-(56) Scan parquet spark_catalog.default.web_page
-Output [1]: [wp_web_page_sk#67]
+(50) Scan parquet spark_catalog.default.web_page
+Output [1]: [wp_web_page_sk#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
 PushedFilters: [IsNotNull(wp_web_page_sk)]
 ReadSchema: struct<wp_web_page_sk:int>
 
-(57) ColumnarToRow [codegen id : 16]
-Input [1]: [wp_web_page_sk#67]
+(51) ColumnarToRow [codegen id : 14]
+Input [1]: [wp_web_page_sk#58]
 
-(58) Filter [codegen id : 16]
-Input [1]: [wp_web_page_sk#67]
-Condition : isnotnull(wp_web_page_sk#67)
+(52) Filter [codegen id : 14]
+Input [1]: [wp_web_page_sk#58]
+Condition : isnotnull(wp_web_page_sk#58)
 
-(59) BroadcastExchange
-Input [1]: [wp_web_page_sk#67]
+(53) BroadcastExchange
+Input [1]: [wp_web_page_sk#58]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8]
 
-(60) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_web_page_sk#62]
-Right keys [1]: [wp_web_page_sk#67]
+(54) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [ws_web_page_sk#53]
+Right keys [1]: [wp_web_page_sk#58]
 Join type: Inner
 Join condition: None
 
-(61) Project [codegen id : 17]
-Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67]
-Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67]
-
-(62) HashAggregate [codegen id : 17]
-Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67]
-Keys [1]: [wp_web_page_sk#67]
-Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))]
-Aggregate Attributes [2]: [sum#68, sum#69]
-Results [3]: [wp_web_page_sk#67, sum#70, sum#71]
-
-(63) Exchange
-Input [3]: [wp_web_page_sk#67, sum#70, sum#71]
-Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=9]
-
-(64) HashAggregate [codegen id : 22]
-Input [3]: [wp_web_page_sk#67, sum#70, sum#71]
-Keys [1]: [wp_web_page_sk#67]
-Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73]
-Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75]
-
-(65) Scan parquet spark_catalog.default.web_returns
-Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
+(55) Project [codegen id : 15]
+Output [3]: [ws_ext_sales_price#54, ws_net_profit#55, wp_web_page_sk#58]
+Input [4]: [ws_web_page_sk#53, ws_ext_sales_price#54, ws_net_profit#55, wp_web_page_sk#58]
+
+(56) HashAggregate [codegen id : 15]
+Input [3]: [ws_ext_sales_price#54, ws_net_profit#55, wp_web_page_sk#58]
+Keys [1]: [wp_web_page_sk#58]
+Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#54)), partial_sum(UnscaledValue(ws_net_profit#55))]
+Aggregate Attributes [2]: [sum#59, sum#60]
+Results [3]: [wp_web_page_sk#58, sum#61, sum#62]
+
+(57) Exchange
+Input [3]: [wp_web_page_sk#58, sum#61, sum#62]
+Arguments: hashpartitioning(wp_web_page_sk#58, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+
+(58) HashAggregate [codegen id : 20]
+Input [3]: [wp_web_page_sk#58, sum#61, sum#62]
+Keys [1]: [wp_web_page_sk#58]
+Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#54)), sum(UnscaledValue(ws_net_profit#55))]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#54))#63, sum(UnscaledValue(ws_net_profit#55))#64]
+Results [3]: [wp_web_page_sk#58, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#54))#63,17,2) AS sales#65, MakeDecimal(sum(UnscaledValue(ws_net_profit#55))#64,17,2) AS profit#66]
+
+(59) Scan parquet spark_catalog.default.web_returns
+Output [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(wr_returned_date_sk#79), dynamicpruningexpression(wr_returned_date_sk#79 IN dynamicpruning#5)]
+PartitionFilters: [isnotnull(wr_returned_date_sk#70), dynamicpruningexpression(wr_returned_date_sk#70 IN dynamicpruning#5)]
 PushedFilters: [IsNotNull(wr_web_page_sk)]
 ReadSchema: struct<wr_web_page_sk:int,wr_return_amt:decimal(7,2),wr_net_loss:decimal(7,2)>
 
-(66) ColumnarToRow [codegen id : 20]
-Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
+(60) ColumnarToRow [codegen id : 18]
+Input [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70]
 
-(67) Filter [codegen id : 20]
-Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
-Condition : isnotnull(wr_web_page_sk#76)
+(61) Filter [codegen id : 18]
+Input [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70]
+Condition : isnotnull(wr_web_page_sk#67)
 
-(68) ReusedExchange [Reuses operator id: 103]
-Output [1]: [d_date_sk#80]
+(62) ReusedExchange [Reuses operator id: 97]
+Output [1]: [d_date_sk#71]
 
-(69) BroadcastHashJoin [codegen id : 20]
-Left keys [1]: [wr_returned_date_sk#79]
-Right keys [1]: [d_date_sk#80]
+(63) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [wr_returned_date_sk#70]
+Right keys [1]: [d_date_sk#71]
 Join type: Inner
 Join condition: None
 
-(70) Project [codegen id : 20]
-Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78]
-Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80]
+(64) Project [codegen id : 18]
+Output [3]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69]
+Input [5]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70, d_date_sk#71]
 
-(71) ReusedExchange [Reuses operator id: 59]
-Output [1]: [wp_web_page_sk#81]
+(65) ReusedExchange [Reuses operator id: 53]
+Output [1]: [wp_web_page_sk#72]
 
-(72) BroadcastHashJoin [codegen id : 20]
-Left keys [1]: [wr_web_page_sk#76]
-Right keys [1]: [wp_web_page_sk#81]
+(66) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [wr_web_page_sk#67]
+Right keys [1]: [wp_web_page_sk#72]
 Join type: Inner
 Join condition: None
 
-(73) Project [codegen id : 20]
-Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81]
-Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81]
-
-(74) HashAggregate [codegen id : 20]
-Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81]
-Keys [1]: [wp_web_page_sk#81]
-Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))]
-Aggregate Attributes [2]: [sum#82, sum#83]
-Results [3]: [wp_web_page_sk#81, sum#84, sum#85]
-
-(75) Exchange
-Input [3]: [wp_web_page_sk#81, sum#84, sum#85]
-Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=10]
-
-(76) HashAggregate [codegen id : 21]
-Input [3]: [wp_web_page_sk#81, sum#84, sum#85]
-Keys [1]: [wp_web_page_sk#81]
-Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))]
-Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87]
-Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89]
-
-(77) BroadcastExchange
-Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89]
+(67) Project [codegen id : 18]
+Output [3]: [wr_return_amt#68, wr_net_loss#69, wp_web_page_sk#72]
+Input [4]: [wr_web_page_sk#67, wr_return_amt#68, wr_net_loss#69, wp_web_page_sk#72]
+
+(68) HashAggregate [codegen id : 18]
+Input [3]: [wr_return_amt#68, wr_net_loss#69, wp_web_page_sk#72]
+Keys [1]: [wp_web_page_sk#72]
+Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#68)), partial_sum(UnscaledValue(wr_net_loss#69))]
+Aggregate Attributes [2]: [sum#73, sum#74]
+Results [3]: [wp_web_page_sk#72, sum#75, sum#76]
+
+(69) Exchange
+Input [3]: [wp_web_page_sk#72, sum#75, sum#76]
+Arguments: hashpartitioning(wp_web_page_sk#72, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+
+(70) HashAggregate [codegen id : 19]
+Input [3]: [wp_web_page_sk#72, sum#75, sum#76]
+Keys [1]: [wp_web_page_sk#72]
+Functions [2]: [sum(UnscaledValue(wr_return_amt#68)), sum(UnscaledValue(wr_net_loss#69))]
+Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#68))#77, sum(UnscaledValue(wr_net_loss#69))#78]
+Results [3]: [wp_web_page_sk#72, MakeDecimal(sum(UnscaledValue(wr_return_amt#68))#77,17,2) AS returns#79, MakeDecimal(sum(UnscaledValue(wr_net_loss#69))#78,17,2) AS profit_loss#80]
+
+(71) BroadcastExchange
+Input [3]: [wp_web_page_sk#72, returns#79, profit_loss#80]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11]
 
-(78) BroadcastHashJoin [codegen id : 22]
-Left keys [1]: [wp_web_page_sk#67]
-Right keys [1]: [wp_web_page_sk#81]
+(72) BroadcastHashJoin [codegen id : 20]
+Left keys [1]: [wp_web_page_sk#58]
+Right keys [1]: [wp_web_page_sk#72]
 Join type: LeftOuter
 Join condition: None
 
-(79) Project [codegen id : 22]
-Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#93]
-Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89]
+(73) Project [codegen id : 20]
+Output [5]: [web channel AS channel#81, wp_web_page_sk#58 AS id#82, sales#65, coalesce(returns#79, 0.00) AS returns#83, (profit#66 - coalesce(profit_loss#80, 0.00)) AS profit#84]
+Input [6]: [wp_web_page_sk#58, sales#65, profit#66, wp_web_page_sk#72, returns#79, profit_loss#80]
 
-(80) Union
+(74) Union
 
-(81) HashAggregate [codegen id : 23]
+(75) HashAggregate [codegen id : 21]
 Input [5]: [channel#30, id#31, sales#14, returns#32, profit#33]
 Keys [2]: [channel#30, id#31]
 Functions [3]: [partial_sum(sales#14), partial_sum(returns#32), partial_sum(profit#33)]
-Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99]
-Results [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105]
+Aggregate Attributes [6]: [sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90]
+Results [8]: [channel#30, id#31, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96]
 
-(82) Exchange
-Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105]
+(76) Exchange
+Input [8]: [channel#30, id#31, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96]
 Arguments: hashpartitioning(channel#30, id#31, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(83) HashAggregate [codegen id : 24]
-Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105]
+(77) HashAggregate [codegen id : 22]
+Input [8]: [channel#30, id#31, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96]
 Keys [2]: [channel#30, id#31]
 Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)]
-Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108]
-Results [5]: [channel#30, id#31, cast(sum(sales#14)#106 as decimal(37,2)) AS sales#109, cast(sum(returns#32)#107 as decimal(37,2)) AS returns#110, cast(sum(profit#33)#108 as decimal(38,2)) AS profit#111]
-
-(84) ReusedExchange [Reuses operator id: 82]
-Output [8]: [channel#112, id#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119]
-
-(85) HashAggregate [codegen id : 48]
-Input [8]: [channel#112, id#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119]
-Keys [2]: [channel#112, id#113]
-Functions [3]: [sum(sales#120), sum(returns#121), sum(profit#122)]
-Aggregate Attributes [3]: [sum(sales#120)#106, sum(returns#121)#107, sum(profit#122)#108]
-Results [4]: [channel#112, sum(sales#120)#106 AS sales#123, sum(returns#121)#107 AS returns#124, sum(profit#122)#108 AS profit#125]
-
-(86) HashAggregate [codegen id : 48]
-Input [4]: [channel#112, sales#123, returns#124, profit#125]
-Keys [1]: [channel#112]
-Functions [3]: [partial_sum(sales#123), partial_sum(returns#124), partial_sum(profit#125)]
-Aggregate Attributes [6]: [sum#126, isEmpty#127, sum#128, isEmpty#129, sum#130, isEmpty#131]
-Results [7]: [channel#112, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137]
-
-(87) Exchange
-Input [7]: [channel#112, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137]
-Arguments: hashpartitioning(channel#112, 5), ENSURE_REQUIREMENTS, [plan_id=13]
-
-(88) HashAggregate [codegen id : 49]
-Input [7]: [channel#112, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137]
-Keys [1]: [channel#112]
-Functions [3]: [sum(sales#123), sum(returns#124), sum(profit#125)]
-Aggregate Attributes [3]: [sum(sales#123)#138, sum(returns#124)#139, sum(profit#125)#140]
-Results [5]: [channel#112, null AS id#141, sum(sales#123)#138 AS sales#142, sum(returns#124)#139 AS returns#143, sum(profit#125)#140 AS profit#144]
-
-(89) ReusedExchange [Reuses operator id: 82]
-Output [8]: [channel#145, id#146, sum#147, isEmpty#148, sum#149, isEmpty#150, sum#151, isEmpty#152]
-
-(90) HashAggregate [codegen id : 73]
-Input [8]: [channel#145, id#146, sum#147, isEmpty#148, sum#149, isEmpty#150, sum#151, isEmpty#152]
-Keys [2]: [channel#145, id#146]
-Functions [3]: [sum(sales#153), sum(returns#154), sum(profit#155)]
-Aggregate Attributes [3]: [sum(sales#153)#106, sum(returns#154)#107, sum(profit#155)#108]
-Results [3]: [sum(sales#153)#106 AS sales#156, sum(returns#154)#107 AS returns#157, sum(profit#155)#108 AS profit#158]
-
-(91) HashAggregate [codegen id : 73]
-Input [3]: [sales#156, returns#157, profit#158]
+Aggregate Attributes [3]: [sum(sales#14)#97, sum(returns#32)#98, sum(profit#33)#99]
+Results [5]: [channel#30, id#31, cast(sum(sales#14)#97 as decimal(37,2)) AS sales#100, cast(sum(returns#32)#98 as decimal(37,2)) AS returns#101, cast(sum(profit#33)#99 as decimal(38,2)) AS profit#102]
+
+(78) ReusedExchange [Reuses operator id: 76]
+Output [8]: [channel#103, id#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110]
+
+(79) HashAggregate [codegen id : 44]
+Input [8]: [channel#103, id#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110]
+Keys [2]: [channel#103, id#104]
+Functions [3]: [sum(sales#111), sum(returns#112), sum(profit#113)]
+Aggregate Attributes [3]: [sum(sales#111)#97, sum(returns#112)#98, sum(profit#113)#99]
+Results [4]: [channel#103, sum(sales#111)#97 AS sales#114, sum(returns#112)#98 AS returns#115, sum(profit#113)#99 AS profit#116]
+
+(80) HashAggregate [codegen id : 44]
+Input [4]: [channel#103, sales#114, returns#115, profit#116]
+Keys [1]: [channel#103]
+Functions [3]: [partial_sum(sales#114), partial_sum(returns#115), partial_sum(profit#116)]
+Aggregate Attributes [6]: [sum#117, isEmpty#118, sum#119, isEmpty#120, sum#121, isEmpty#122]
+Results [7]: [channel#103, sum#123, isEmpty#124, sum#125, isEmpty#126, sum#127, isEmpty#128]
+
+(81) Exchange
+Input [7]: [channel#103, sum#123, isEmpty#124, sum#125, isEmpty#126, sum#127, isEmpty#128]
+Arguments: hashpartitioning(channel#103, 5), ENSURE_REQUIREMENTS, [plan_id=13]
+
+(82) HashAggregate [codegen id : 45]
+Input [7]: [channel#103, sum#123, isEmpty#124, sum#125, isEmpty#126, sum#127, isEmpty#128]
+Keys [1]: [channel#103]
+Functions [3]: [sum(sales#114), sum(returns#115), sum(profit#116)]
+Aggregate Attributes [3]: [sum(sales#114)#129, sum(returns#115)#130, sum(profit#116)#131]
+Results [5]: [channel#103, null AS id#132, sum(sales#114)#129 AS sales#133, sum(returns#115)#130 AS returns#134, sum(profit#116)#131 AS profit#135]
+
+(83) ReusedExchange [Reuses operator id: 76]
+Output [8]: [channel#136, id#137, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143]
+
+(84) HashAggregate [codegen id : 67]
+Input [8]: [channel#136, id#137, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143]
+Keys [2]: [channel#136, id#137]
+Functions [3]: [sum(sales#144), sum(returns#145), sum(profit#146)]
+Aggregate Attributes [3]: [sum(sales#144)#97, sum(returns#145)#98, sum(profit#146)#99]
+Results [3]: [sum(sales#144)#97 AS sales#147, sum(returns#145)#98 AS returns#148, sum(profit#146)#99 AS profit#149]
+
+(85) HashAggregate [codegen id : 67]
+Input [3]: [sales#147, returns#148, profit#149]
 Keys: []
-Functions [3]: [partial_sum(sales#156), partial_sum(returns#157), partial_sum(profit#158)]
-Aggregate Attributes [6]: [sum#159, isEmpty#160, sum#161, isEmpty#162, sum#163, isEmpty#164]
-Results [6]: [sum#165, isEmpty#166, sum#167, isEmpty#168, sum#169, isEmpty#170]
+Functions [3]: [partial_sum(sales#147), partial_sum(returns#148), partial_sum(profit#149)]
+Aggregate Attributes [6]: [sum#150, isEmpty#151, sum#152, isEmpty#153, sum#154, isEmpty#155]
+Results [6]: [sum#156, isEmpty#157, sum#158, isEmpty#159, sum#160, isEmpty#161]
 
-(92) Exchange
-Input [6]: [sum#165, isEmpty#166, sum#167, isEmpty#168, sum#169, isEmpty#170]
+(86) Exchange
+Input [6]: [sum#156, isEmpty#157, sum#158, isEmpty#159, sum#160, isEmpty#161]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14]
 
-(93) HashAggregate [codegen id : 74]
-Input [6]: [sum#165, isEmpty#166, sum#167, isEmpty#168, sum#169, isEmpty#170]
+(87) HashAggregate [codegen id : 68]
+Input [6]: [sum#156, isEmpty#157, sum#158, isEmpty#159, sum#160, isEmpty#161]
 Keys: []
-Functions [3]: [sum(sales#156), sum(returns#157), sum(profit#158)]
-Aggregate Attributes [3]: [sum(sales#156)#171, sum(returns#157)#172, sum(profit#158)#173]
-Results [5]: [null AS channel#174, null AS id#175, sum(sales#156)#171 AS sales#176, sum(returns#157)#172 AS returns#177, sum(profit#158)#173 AS profit#178]
+Functions [3]: [sum(sales#147), sum(returns#148), sum(profit#149)]
+Aggregate Attributes [3]: [sum(sales#147)#162, sum(returns#148)#163, sum(profit#149)#164]
+Results [5]: [null AS channel#165, null AS id#166, sum(sales#147)#162 AS sales#167, sum(returns#148)#163 AS returns#168, sum(profit#149)#164 AS profit#169]
 
-(94) Union
+(88) Union
 
-(95) HashAggregate [codegen id : 75]
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+(89) HashAggregate [codegen id : 69]
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Keys [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+Results [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 
-(96) Exchange
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Arguments: hashpartitioning(channel#30, id#31, sales#109, returns#110, profit#111, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+(90) Exchange
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Arguments: hashpartitioning(channel#30, id#31, sales#100, returns#101, profit#102, 5), ENSURE_REQUIREMENTS, [plan_id=15]
 
-(97) HashAggregate [codegen id : 76]
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+(91) HashAggregate [codegen id : 70]
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Keys [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
+Results [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
 
-(98) TakeOrderedAndProject
-Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111]
-Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#109, returns#110, profit#111]
+(92) TakeOrderedAndProject
+Input [5]: [channel#30, id#31, sales#100, returns#101, profit#102]
+Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#100, returns#101, profit#102]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (103)
-+- * Project (102)
-   +- * Filter (101)
-      +- * ColumnarToRow (100)
-         +- Scan parquet spark_catalog.default.date_dim (99)
+BroadcastExchange (97)
++- * Project (96)
+   +- * Filter (95)
+      +- * ColumnarToRow (94)
+         +- Scan parquet spark_catalog.default.date_dim (93)
 
 
-(99) Scan parquet spark_catalog.default.date_dim
-Output [2]: [d_date_sk#6, d_date#179]
+(93) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#6, d_date#170]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
-(100) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#6, d_date#179]
+(94) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#6, d_date#170]
 
-(101) Filter [codegen id : 1]
-Input [2]: [d_date_sk#6, d_date#179]
-Condition : (((isnotnull(d_date#179) AND (d_date#179 >= 1998-08-04)) AND (d_date#179 <= 1998-09-03)) AND isnotnull(d_date_sk#6))
+(95) Filter [codegen id : 1]
+Input [2]: [d_date_sk#6, d_date#170]
+Condition : (((isnotnull(d_date#170) AND (d_date#170 >= 1998-08-04)) AND (d_date#170 <= 1998-09-03)) AND isnotnull(d_date_sk#6))
 
-(102) Project [codegen id : 1]
+(96) Project [codegen id : 1]
 Output [1]: [d_date_sk#6]
-Input [2]: [d_date_sk#6, d_date#179]
+Input [2]: [d_date_sk#6, d_date#170]
 
-(103) BroadcastExchange
+(97) BroadcastExchange
 Input [1]: [d_date_sk#6]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16]
 
@@ -623,10 +583,69 @@ Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#19
 
 Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#37 IN dynamicpruning#5
 
-Subquery:4 Hosting operator id = 40 Hosting Expression = cr_returned_date_sk#49 IN dynamicpruning#5
+Subquery:4 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#47, [id=#7]
+* Project (106)
++- * HashAggregate (105)
+   +- Exchange (104)
+      +- * HashAggregate (103)
+         +- * Project (102)
+            +- * BroadcastHashJoin Inner BuildRight (101)
+               :- * ColumnarToRow (99)
+               :  +- Scan parquet spark_catalog.default.catalog_returns (98)
+               +- ReusedExchange (100)
+
+
+(98) Scan parquet spark_catalog.default.catalog_returns
+Output [3]: [cr_return_amount#171, cr_net_loss#172, cr_returned_date_sk#173]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(cr_returned_date_sk#173), dynamicpruningexpression(cr_returned_date_sk#173 IN dynamicpruning#5)]
+ReadSchema: struct<cr_return_amount:decimal(7,2),cr_net_loss:decimal(7,2)>
+
+(99) ColumnarToRow [codegen id : 2]
+Input [3]: [cr_return_amount#171, cr_net_loss#172, cr_returned_date_sk#173]
+
+(100) ReusedExchange [Reuses operator id: 97]
+Output [1]: [d_date_sk#174]
+
+(101) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [cr_returned_date_sk#173]
+Right keys [1]: [d_date_sk#174]
+Join type: Inner
+Join condition: None
+
+(102) Project [codegen id : 2]
+Output [2]: [cr_return_amount#171, cr_net_loss#172]
+Input [4]: [cr_return_amount#171, cr_net_loss#172, cr_returned_date_sk#173, d_date_sk#174]
+
+(103) HashAggregate [codegen id : 2]
+Input [2]: [cr_return_amount#171, cr_net_loss#172]
+Keys: []
+Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#171)), partial_sum(UnscaledValue(cr_net_loss#172))]
+Aggregate Attributes [2]: [sum#175, sum#176]
+Results [2]: [sum#177, sum#178]
+
+(104) Exchange
+Input [2]: [sum#177, sum#178]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17]
+
+(105) HashAggregate [codegen id : 3]
+Input [2]: [sum#177, sum#178]
+Keys: []
+Functions [2]: [sum(UnscaledValue(cr_return_amount#171)), sum(UnscaledValue(cr_net_loss#172))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#171))#179, sum(UnscaledValue(cr_net_loss#172))#180]
+Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#171))#179,17,2) AS returns#48, MakeDecimal(sum(UnscaledValue(cr_net_loss#172))#180,17,2) AS profit_loss#49]
+
+(106) Project [codegen id : 3]
+Output [1]: [named_struct(returns, returns#48, profit_loss, profit_loss#49) AS mergedValue#181]
+Input [2]: [returns#48, profit_loss#49]
+
+Subquery:5 Hosting operator id = 98 Hosting Expression = cr_returned_date_sk#173 IN dynamicpruning#5
+
+Subquery:6 Hosting operator id = 41 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#7]
 
-Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#5
+Subquery:7 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#56 IN dynamicpruning#5
 
-Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#79 IN dynamicpruning#5
+Subquery:8 Hosting operator id = 59 Hosting Expression = wr_returned_date_sk#70 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
index e7eb75e97c5f8..74f3b1090676e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
@@ -1,17 +1,17 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (76)
+  WholeStageCodegen (70)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (75)
+          WholeStageCodegen (69)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (24)
+                  WholeStageCodegen (22)
                     HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [channel,id] #2
-                          WholeStageCodegen (23)
+                          WholeStageCodegen (21)
                             HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               InputAdapter
                                 Union
@@ -69,7 +69,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   ReusedExchange [d_date_sk] #4
                                                             InputAdapter
                                                               ReusedExchange [s_store_sk] #5
-                                  WholeStageCodegen (14)
+                                  WholeStageCodegen (12)
                                     Project [cs_call_center_sk,sales,returns,profit,profit_loss]
                                       BroadcastNestedLoopJoin
                                         InputAdapter
@@ -88,26 +88,32 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #4
-                                        HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
-                                          InputAdapter
-                                            Exchange #10
-                                              WholeStageCodegen (13)
-                                                HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
-                                                  Project [cr_return_amount,cr_net_loss]
-                                                    BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
-                                                            ReusedSubquery [d_date_sk] #1
-                                                      InputAdapter
-                                                        ReusedExchange [d_date_sk] #4
-                                  WholeStageCodegen (22)
+                                        Project
+                                          Subquery #2
+                                            WholeStageCodegen (3)
+                                              Project [returns,profit_loss]
+                                                HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
+                                                  InputAdapter
+                                                    Exchange #10
+                                                      WholeStageCodegen (2)
+                                                        HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
+                                                          Project [cr_return_amount,cr_net_loss]
+                                                            BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #1
+                                                              InputAdapter
+                                                                ReusedExchange [d_date_sk] #4
+                                          ReusedSubquery [mergedValue] #2
+                                          Scan OneRowRelation
+                                  WholeStageCodegen (20)
                                     Project [wp_web_page_sk,sales,returns,profit,profit_loss]
                                       BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
                                         HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
                                           InputAdapter
                                             Exchange [wp_web_page_sk] #11
-                                              WholeStageCodegen (17)
+                                              WholeStageCodegen (15)
                                                 HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum]
                                                   Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk]
                                                     BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
@@ -122,18 +128,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             ReusedExchange [d_date_sk] #4
                                                       InputAdapter
                                                         BroadcastExchange #12
-                                                          WholeStageCodegen (16)
+                                                          WholeStageCodegen (14)
                                                             Filter [wp_web_page_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                                         InputAdapter
                                           BroadcastExchange #13
-                                            WholeStageCodegen (21)
+                                            WholeStageCodegen (19)
                                               HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum]
                                                 InputAdapter
                                                   Exchange [wp_web_page_sk] #14
-                                                    WholeStageCodegen (20)
+                                                    WholeStageCodegen (18)
                                                       HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum]
                                                         Project [wr_return_amt,wr_net_loss,wp_web_page_sk]
                                                           BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk]
@@ -148,20 +154,20 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   ReusedExchange [d_date_sk] #4
                                                             InputAdapter
                                                               ReusedExchange [wp_web_page_sk] #12
-                  WholeStageCodegen (49)
+                  WholeStageCodegen (45)
                     HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [channel] #15
-                          WholeStageCodegen (48)
+                          WholeStageCodegen (44)
                             HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
                                   ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
-                  WholeStageCodegen (74)
+                  WholeStageCodegen (68)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange #16
-                          WholeStageCodegen (73)
+                          WholeStageCodegen (67)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala
index c0f1d7ebaa05b..5612c831c37a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql
 
+import java.util.Locale
+
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.AnalysisContext
 import org.apache.spark.sql.catalyst.analysis.SQLScalarFunction
 import org.apache.spark.sql.catalyst.catalog.{
   CatalogStorageFormat,
@@ -31,6 +34,7 @@ import org.apache.spark.sql.catalyst.catalog.{
 }
 import org.apache.spark.sql.catalyst.expressions.Alias
 import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project, View}
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
@@ -169,6 +173,106 @@ class AlwaysPersistedConfigsSuite extends SharedSparkSession {
     assert(sqlConf.settings.get("spark.sql.ansi.enabled") == "false")
   }
 
+  test("Current schema marker is materialized in persisted view path") {
+    withView(testViewName) {
+      withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+        sql("CREATE DATABASE IF NOT EXISTS path_materialized_view")
+        try {
+          sql("USE path_materialized_view")
+          sql("SET PATH = current_schema, system.builtin")
+          sql(s"CREATE VIEW $testViewName AS SELECT 1")
+          val metadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier(testViewName))
+          val storedPath = metadata.viewStoredResolutionPath.getOrElse(
+            fail("Expected persisted view resolution path to be set"))
+          val parsed = CatalogManager.deserializePathEntries(storedPath).getOrElse(
+            fail(s"Expected a valid serialized path, got: $storedPath"))
+          assert(parsed.head == Seq("spark_catalog", "path_materialized_view"))
+          assert(!storedPath.toLowerCase(Locale.ROOT).contains("current_schema"))
+        } finally {
+          sql("SET PATH = DEFAULT_PATH")
+          sql(s"DROP VIEW IF EXISTS path_materialized_view.$testViewName")
+          sql(s"DROP VIEW IF EXISTS $testViewName")
+          sql("USE default")
+          sql("DROP DATABASE IF EXISTS path_materialized_view")
+        }
+      }
+    }
+  }
+
+  test("Current schema marker is materialized in persisted function path") {
+    withUserDefinedFunction(testFunctionName -> false) {
+      withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+        sql("CREATE DATABASE IF NOT EXISTS path_materialized_fn")
+        try {
+          sql("USE path_materialized_fn")
+          sql("SET PATH = current_schema, system.builtin")
+          sql(
+            s"""
+               |CREATE OR REPLACE FUNCTION $testFunctionName()
+               |RETURN SELECT 1
+               |""".stripMargin)
+          val function = analyzedSqlFunction(testFunctionName)
+          val storedPath = function.functionStoredResolutionPath.getOrElse(
+            fail("Expected persisted function resolution path to be set"))
+          val parsed = CatalogManager.deserializePathEntries(storedPath).getOrElse(
+            fail(s"Expected a valid serialized path, got: $storedPath"))
+          assert(parsed.head == Seq("spark_catalog", "path_materialized_fn"))
+          assert(!storedPath.toLowerCase(Locale.ROOT).contains("current_schema"))
+        } finally {
+          sql("SET PATH = DEFAULT_PATH")
+          sql(s"DROP FUNCTION IF EXISTS path_materialized_fn.$testFunctionName")
+          sql(s"DROP FUNCTION IF EXISTS $testFunctionName")
+          sql("USE default")
+          sql("DROP DATABASE IF EXISTS path_materialized_fn")
+        }
+      }
+    }
+  }
+
+  test("Session-only path is omitted from persisted view metadata and remains queryable") {
+    withTable("default.path_empty_src") {
+      withView(testViewName) {
+        withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+          sql("CREATE TABLE default.path_empty_src (id INT) USING parquet")
+          sql("INSERT INTO default.path_empty_src VALUES (1)")
+          try {
+            // Persisted objects strip system.session; a session-only path should not be persisted.
+            sql("SET PATH = system.session")
+            sql(
+              s"CREATE VIEW $testViewName AS SELECT id FROM spark_catalog.default.path_empty_src")
+            val metadata =
+              spark.sessionState.catalog.getTableMetadata(TableIdentifier(testViewName))
+            assert(metadata.viewStoredResolutionPath.isEmpty)
+            checkAnswer(sql(s"SELECT id FROM default.$testViewName"), Row(1))
+          } finally {
+            sql("SET PATH = DEFAULT_PATH")
+          }
+        }
+      }
+    }
+  }
+
+  test("Malformed persisted SQL function path fails analysis context setup") {
+    withUserDefinedFunction(testFunctionName -> false) {
+      withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+        sql(
+          s"""
+             |CREATE OR REPLACE FUNCTION $testFunctionName()
+             |RETURN SELECT 1
+             |""".stripMargin)
+        val function = analyzedSqlFunction(testFunctionName)
+        val broken = function.copy(
+          properties = function.properties + (SQLFunction.FUNCTION_RESOLUTION_PATH -> "{bad-json"))
+        val e = intercept[AnalysisException] {
+          AnalysisContext.withAnalysisContext(broken) {
+            ()
+          }
+        }
+        assert(e.getMessage.contains("Invalid stored SQL path metadata for SQL function"))
+      }
+    }
+  }
+
   private def testView(confName: String, expectedValue: String): Unit = {
     sql(s"CREATE VIEW $testViewName AS SELECT CAST('string' AS BIGINT) AS alias")
 
@@ -185,21 +289,19 @@ class AlwaysPersistedConfigsSuite extends SharedSparkSession {
          |RETURN SELECT CAST('string' AS BIGINT) AS alias
          |""".stripMargin)
 
-    val df = sql(s"select $testFunctionName()")
+    assert(analyzedSqlFunction(testFunctionName).properties.get(confName).get == expectedValue)
+  }
 
-    assert(
-      df.queryExecution.analyzed
-        .asInstanceOf[Project]
-        .projectList
-        .head
-        .asInstanceOf[Alias]
-        .child
-        .asInstanceOf[SQLScalarFunction]
-        .function
-        .asInstanceOf[SQLFunction]
-        .properties
-        .get(confName)
-        .get == expectedValue
-    )
+  private def analyzedSqlFunction(functionName: String): SQLFunction = {
+    val df = sql(s"select $functionName()")
+    df.queryExecution.analyzed
+      .asInstanceOf[Project]
+      .projectList
+      .head
+      .asInstanceOf[Alias]
+      .child
+      .asInstanceOf[SQLScalarFunction]
+      .function
+      .asInstanceOf[SQLFunction]
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 7562d5669cc2c..3833b7f2509d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -261,6 +261,73 @@ abstract class CTEInlineSuiteBase
     }
   }
 
+  test("SPARK-56921: plan normalization handles nested CTEs under union") {
+    withTempView("input", "common") {
+      Seq((1, 1, 10), (1, 2, 20), (2, 1, 30))
+        .toDF("a", "b", "value")
+        .createOrReplaceTempView("input")
+
+      sql(
+        s"""with cte_common as (
+           |  select a, b, sum(value) as value
+           |  from input
+           |  group by a, b
+           |)
+           |select * from cte_common
+         """.stripMargin).createOrReplaceTempView("common")
+
+      val left = sql(
+        s"""with cte_a as (
+           |  select a, sum(value) as value
+           |  from common
+           |  group by a
+           |)
+           |select a as id, value from cte_a
+         """.stripMargin)
+
+      val right = sql(
+        s"""with cte_b as (
+           |  select b, sum(value) as value
+           |  from common
+           |  group by b
+           |)
+           |select b as id, value from cte_b
+         """.stripMargin)
+
+      checkAnswer(
+        left.union(right),
+        Row(1, 30) :: Row(2, 30) :: Row(1, 40) :: Row(2, 20) :: Nil)
+    }
+  }
+
+  test("SPARK-56921: plan normalization preserves recursive CTE loop refs") {
+    val df = sql(
+      s"""with recursive t(n) as (
+         |  select 1
+         |  union all
+         |  select n + 1 from t where n < 3
+         |)
+         |select * from t
+       """.stripMargin)
+
+    val normalized = df.queryExecution.normalized
+    val unionLoops = normalized.collect { case unionLoop: UnionLoop => unionLoop }
+
+    assert(unionLoops.nonEmpty, "Recursive CTE should normalize with a UnionLoop.")
+    unionLoops.foreach { unionLoop =>
+      val unionLoopRefs = unionLoop.recursion.collect {
+        case unionLoopRef: UnionLoopRef => unionLoopRef
+      }
+
+      assert(unionLoopRefs.nonEmpty, "Recursive CTE should normalize with a UnionLoopRef.")
+      assert(
+        unionLoopRefs.forall(_.loopId == unionLoop.id),
+        "UnionLoopRef loop IDs should match the normalized UnionLoop ID.")
+    }
+
+    checkAnswer(df, Row(1) :: Row(2) :: Row(3) :: Nil)
+  }
+
   test("SPARK-36447: invalid nested CTEs") {
     withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 3c99c975977a2..36244071206b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -656,6 +656,51 @@ class DataFrameAggregateSuite extends SharedSparkSession
       df.selectExpr("sort_array(collect_set(b) RESPECT NULLS)"), Seq(Row(Seq(null, 2))))
   }
 
+  test("SPARK-57298: collect_set normalizes NaN and -0.0 for floating-point types") {
+    checkAnswer(
+      sql("SELECT collect_set(v) FROM VALUES (double('NaN')), (double('NaN')) AS t(v)"),
+      Row(Seq(Double.NaN)))
+    checkAnswer(
+      sql("SELECT collect_set(v) FROM VALUES (float('NaN')), (float('NaN')) AS t(v)"),
+      Row(Seq(Float.NaN)))
+
+    checkAnswer(
+      sql("SELECT collect_set(v) FROM VALUES (-0.0D), (0.0D) AS t(v)"),
+      Row(Seq(0.0d)))
+    checkAnswer(
+      sql("SELECT collect_set(v) FROM VALUES (float(-0.0)), (float(0.0)) AS t(v)"),
+      Row(Seq(0.0f)))
+
+    val df = Seq(Double.NaN, Double.NaN, 0.0d, -0.0d, 1.0d).toDF("v").repartition(3)
+    checkAnswer(df.selectExpr("sort_array(collect_set(v))"), Row(Seq(0.0d, 1.0d, Double.NaN)))
+  }
+
+  test("SPARK-57298: collect_set normalizes NaN and -0.0 nested in complex types") {
+    checkAnswer(
+      sql("SELECT collect_set(named_struct('a', v)) FROM VALUES (-0.0D), (0.0D) AS t(v)"),
+      Row(Seq(Row(0.0d))))
+    checkAnswer(
+      sql("SELECT collect_set(a) FROM VALUES (array(-0.0D)), (array(0.0D)) AS t(a)"),
+      Row(Seq(Seq(0.0d))))
+    checkAnswer(
+      sql("SELECT collect_set(a) FROM VALUES (array(float(-0.0))), (array(float(0.0))) AS t(a)"),
+      Row(Seq(Seq(0.0f))))
+
+    // Nested NaN already deduplicates today, included as a guardrail against regressions.
+    checkAnswer(
+      sql("SELECT collect_set(named_struct('a', v)) FROM " +
+        "VALUES (double('NaN')), (double('NaN')) AS t(v)"),
+      Row(Seq(Row(Double.NaN))))
+    checkAnswer(
+      sql("SELECT collect_set(a) FROM " +
+        "VALUES (array(double('NaN'))), (array(double('NaN'))) AS t(a)"),
+      Row(Seq(Seq(Double.NaN))))
+    checkAnswer(
+      sql("SELECT collect_set(a) FROM " +
+        "VALUES (array(float('NaN'))), (array(float('NaN'))) AS t(a)"),
+      Row(Seq(Seq(Float.NaN))))
+  }
+
   test("collect functions structs") {
     val df = Seq((1, 2, 2), (2, 2, 2), (3, 4, 1))
       .toDF("a", "x", "y")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index a0d9d2e9f40d3..b92b9c08c4581 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC}
+import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -350,6 +351,13 @@ class DataFrameFunctionsSuite extends SharedSparkSession {
            "expression" -> "\"id\"",
            "expressionAnyValue" -> "\"any_value(id)\"")
         )
+
+        val nestedDf = Seq("error_multiple_providers", "openai")
+          .toDF("provider")
+          .select(struct(col("provider")).as("c"))
+        checkAnswer(
+          nestedDf.select(nullif(col("c.provider"), lower(lit("ERROR_MULTIPLE_PROVIDERS")))),
+          Seq(Row(null), Row("openai")))
       }
     }
   }
@@ -1986,6 +1994,40 @@ class DataFrameFunctionsSuite extends SharedSparkSession {
     )
   }
 
+  test("array_join with nullable nullReplacement under whole-stage codegen") {
+    // With a nullable nullReplacement column and an upstream IsNotNull
+    // filter that tightens the array (and delimiter) to non-nullable, whole-stage codegen used to
+    // build the joined string but leave ev.isNull = true, discarding every row as NULL. The result
+    // must match interpreted eval(). The source is materialized via a cached temp view (an
+    // InMemoryRelation), so the plan is not folded to interpreted eval by ConvertToLocalRelation.
+    withTempView("array_join_codegen") {
+      Seq(
+        (Seq[String]("a", null, "b"), ",", "NR"),
+        (Seq[String]("a", null, "b"), ",", null),
+        (Seq[String]("x", "y"), "-", "NR")
+      ).toDF("arr", "delim_col", "repl_col").createOrReplaceTempView("array_join_codegen")
+      spark.catalog.cacheTable("array_join_codegen")
+
+      val query =
+        "SELECT array_join(arr, delim_col, repl_col) FROM array_join_codegen " +
+          "WHERE arr IS NOT NULL AND delim_col IS NOT NULL"
+
+      withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY") {
+        val df = sql(query)
+        assert(
+          df.queryExecution.executedPlan.exists(_.isInstanceOf[WholeStageCodegenExec]),
+          "expected the array_join query to run inside whole-stage codegen")
+        checkAnswer(df, Seq(Row("a,NR,b"), Row(null), Row("x-y")))
+      }
+
+      withSQLConf(
+          SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+          SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN") {
+        checkAnswer(sql(query), Seq(Row("a,NR,b"), Row(null), Row("x-y")))
+      }
+    }
+  }
+
   test("array_min function") {
     val df = Seq(
       Seq[Option[Int]](Some(1), Some(3), Some(2)),
@@ -6341,7 +6383,159 @@ class DataFrameFunctionsSuite extends SharedSparkSession {
           call_function("spark_catalog.default.custom_sum", $"a")),
         Row(12.0, 12.0, 12.0))
     }
+  }
+
+  private def isPositiveZero(d: Double): Boolean =
+    java.lang.Double.doubleToRawLongBits(d) == 0L
+
+  test("SPARK-54918: array set ops normalize -0.0 and NaN via VALUES inline table") {
+    val r = sql("""
+      SELECT
+        array_distinct(a) AS d,
+        array_union(a, b) AS u,
+        array_intersect(a, b) AS i,
+        array_except(a, b) AS e,
+        arrays_overlap(a, b) AS o
+      FROM VALUES (array(-0.0d, 0.0d, double('NaN')), array(0.0d, double('NaN')))
+      AS t(a, b)
+    """).head()
+
+    val distinct = r.getSeq[Double](0)
+    assert(distinct.length == 2)
+    assert(distinct.exists(isPositiveZero))
+    assert(distinct.exists(_.isNaN))
+
+    val union = r.getSeq[Double](1)
+    assert(union.length == 2)
+    assert(union.exists(isPositiveZero))
+    assert(union.exists(_.isNaN))
+
+    val intersect = r.getSeq[Double](2)
+    assert(intersect.length == 2)
+    assert(intersect.exists(isPositiveZero))
+    assert(intersect.exists(_.isNaN))
+
+    val except = r.getSeq[Double](3)
+    assert(except.isEmpty)
+
+    assert(r.getBoolean(4))
+  }
+
+  test("SPARK-54918: array_distinct normalizes -0.0 to +0.0 - literals") {
+    val r1 = Seq(1).toDF()
+      .select(array_distinct(typedLit(Array(-0.0d, 0.0d)))).head().getSeq[Double](0)
+
+    assert(r1.length == 1)
+    assert(isPositiveZero(r1.head))
+
+    val r2 = Seq(1).toDF()
+      .select(array_distinct(
+        typedLit(Array(Double.NaN, 0.0d, -0.0d, Double.NaN)))
+      ).head().getSeq[Double](0)
+
+    assert(r2.length == 2)
+    assert(r2.exists(_.isNaN))
+    assert(r2.exists(isPositiveZero))
+  }
+
+  test("SPARK-54918: array_distinct normalizes -0.0 to +0.0") {
+    val r1 = Seq(Array(-0.0d, 0.0d)).toDF("a")
+      .select(array_distinct($"a")).head().getSeq[Double](0)
+
+    assert(r1.length == 1)
+    assert(isPositiveZero(r1.head))
+
+    val r2 = Seq(Array(Double.NaN, 0.0d, -0.0d, Double.NaN)).toDF("a")
+      .select(array_distinct($"a")).head().getSeq[Double](0)
+
+    assert(r2.length == 2)
+    assert(r2.exists(_.isNaN))
+    assert(r2.exists(isPositiveZero))
+  }
+
+  test("SPARK-54918: array_union normalizes -0.0 to +0.0 - literals") {
+    val r = Seq(1).toDF()
+      .select(array_union(
+        typedLit(Array(-0.0d)),
+        typedLit(Array(0.0d)))
+      ).head().getSeq[Double](0)
+
+    assert(r.length == 1)
+    assert(isPositiveZero(r.head))
+  }
+
+  test("SPARK-54918: array_union normalizes -0.0 to +0.0") {
+    val r = Seq((Array(-0.0d), Array(0.0d))).toDF("a", "b")
+      .select(array_union($"a", $"b")).head().getSeq[Double](0)
+
+    assert(r.length == 1)
+    assert(isPositiveZero(r.head))
+  }
+
+  test("SPARK-54918: array_intersect normalizes -0.0 to +0.0 - literals") {
+    val r = Seq(1).toDF()
+      .select(array_intersect(
+        typedLit(Array(-0.0d)),
+        typedLit(Array(0.0d)))
+      ).head().getSeq[Double](0)
+
+    assert(r.length == 1)
+    assert(isPositiveZero(r.head))
+  }
+
+  test("SPARK-54918: array_intersect normalizes -0.0 to +0.0") {
+    val r = Seq((Array(-0.0d), Array(0.0d))).toDF("a", "b")
+      .select(array_intersect($"a", $"b")).head().getSeq[Double](0)
+
+    assert(r.length == 1)
+    assert(isPositiveZero(r.head))
+  }
+
+  test("SPARK-54918: array_except normalizes -0.0 to +0.0 - literals") {
+    val r1 = Seq(1).toDF()
+      .select(array_except(
+        typedLit(Array(-0.0d)),
+        typedLit(Array(0.0d)))
+      ).head().getSeq[Double](0)
+
+    assert(r1.isEmpty)
+
+    val r2 = Seq(1).toDF()
+      .select(array_except(
+        typedLit(Array(0.0d)),
+        typedLit(Array(-0.0d)))
+      ).head().getSeq[Double](0)
+
+    assert(r2.isEmpty)
+  }
+
+  test("SPARK-54918: array_except normalizes -0.0 to +0.0") {
+    val r1 = Seq((Array(-0.0d), Array(0.0d))).toDF("a", "b")
+      .select(array_except($"a", $"b")).head().getSeq[Double](0)
+
+    assert(r1.isEmpty)
+
+    val r2 = Seq((Array(0.0d), Array(-0.0d))).toDF("a", "b")
+      .select(array_except($"a", $"b")).head().getSeq[Double](0)
+
+    assert(r2.isEmpty)
+  }
+
+  test("SPARK-54918: arrays_overlap normalizes -0.0 to +0.0 - literals") {
+    val r = Seq(1).toDF()
+      .select(arrays_overlap(
+        typedLit(Array(-0.0d)),
+        typedLit(Array(0.0d)))
+      ).head().getBoolean(0)
+
+    assert(r)
+  }
+
+  test("SPARK-54918: arrays_overlap normalizes -0.0 to +0.0") {
+    val r = Seq((Array(-0.0d), Array(0.0d))).toDF("a", "b")
+      .select(arrays_overlap($"a", $"b")).head().getBoolean(0)
 
+    assert(r)
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNearestByJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNearestByJoinSuite.scala
new file mode 100644
index 0000000000000..271c5eb4552cc
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNearestByJoinSuite.scala
@@ -0,0 +1,441 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.plans.{NearestByDirection, NearestByJoinMode, NearestByJoinType}
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.SlowSQLTest
+
+@SlowSQLTest
+class DataFrameNearestByJoinSuite extends QueryTest with SharedSparkSession {
+
+  private def prepareForNearestByJoin(): (classic.DataFrame, classic.DataFrame) = {
+    val users = spark.createDataFrame(
+      Seq((1, 10.0), (2, 20.0), (3, 30.0))).toDF("user_id", "score")
+    val products = spark.createDataFrame(
+      Seq(("A", 11.0), ("B", 22.0), ("C", 5.0))).toDF("product", "pscore")
+    (users, products)
+  }
+
+  test("similarity, inner, k=1") {
+    val (users, products) = prepareForNearestByJoin()
+    val result = users.nearestByJoin(
+      products,
+      -abs(users("score") - products("pscore")),
+      numResults = 1,
+      mode = "exact",
+      direction = "similarity")
+
+    checkAnswer(
+      result.select("user_id", "product").orderBy("user_id"),
+      Seq(Row(1, "A"), Row(2, "B"), Row(3, "B"))
+    )
+  }
+
+  test("distance, inner, k=2") {
+    val (users, products) = prepareForNearestByJoin()
+    val result = users.nearestByJoin(
+      products,
+      abs(users("score") - products("pscore")),
+      numResults = 2,
+      mode = "exact",
+      direction = "distance")
+
+    // For each user_id, closest 2 by |score - pscore|:
+    //   user 1 (10): A (|10-11|=1), C (|10-5|=5)
+    //   user 2 (20): B (|20-22|=2), A (|20-11|=9)
+    //   user 3 (30): B (|30-22|=8), A (|30-11|=19)
+    checkAnswer(
+      result.select("user_id", "product").orderBy("user_id", "product"),
+      Seq(
+        Row(1, "A"), Row(1, "C"),
+        Row(2, "A"), Row(2, "B"),
+        Row(3, "A"), Row(3, "B"))
+    )
+  }
+
+  test("left outer when right side is empty") {
+    val (users, products) = prepareForNearestByJoin()
+    val emptyProducts = products.filter(lit(false))
+    val result = users.nearestByJoin(
+      emptyProducts,
+      -abs(users("score") - emptyProducts("pscore")),
+      numResults = 1,
+      joinType = "leftouter",
+      mode = "approx",
+      direction = "similarity")
+
+    checkAnswer(
+      result.select("user_id", "product").orderBy("user_id"),
+      Seq(Row(1, null), Row(2, null), Row(3, null))
+    )
+  }
+
+  test("inner drops left rows with no matches") {
+    val (users, products) = prepareForNearestByJoin()
+    val emptyProducts = products.filter(lit(false))
+    val result = users.nearestByJoin(
+      emptyProducts,
+      -abs(users("score") - emptyProducts("pscore")),
+      numResults = 1,
+      mode = "exact",
+      direction = "similarity")
+
+    assert(result.count() === 0)
+  }
+
+  test("self-join: each row finds nearest other rows in the same DataFrame") {
+    val (users, _) = prepareForNearestByJoin()
+    // We pass `users` as both sides; DeduplicateRelations rewrites the right side to
+    // generate fresh ExprIds, so the join resolves. Both `users("score")` references in
+    // the ranking expression bind to the original (left) attribute, so the rank is
+    // identically 0 for every candidate -- this test exercises self-join resolution,
+    // not nearest-row selection.
+    val result = users.nearestByJoin(
+      users,
+      -abs(users("score") - users("score")),
+      numResults = 2,
+      mode = "exact",
+      direction = "similarity")
+
+    // 3 users x 2 nearest = 6 rows; output schema has user_id and score from both sides.
+    assert(result.count() === 6)
+    assert(result.columns.length === 4)
+  }
+
+  test("inner: NULL ranking values for all candidates drops the left row") {
+    // Construct a left side where every comparison yields NULL: a NULL score on the left makes
+    // `abs(left.score - right.pscore)` evaluate to NULL for every right row, so MaxMinByK skips
+    // every candidate (its `ord == null` early-return path) and the heap stays empty. With INNER,
+    // the left row is dropped entirely.
+    val users = spark.createDataFrame(
+      Seq[(Int, java.lang.Double)]((1, null), (2, 20.0d))).toDF("user_id", "score")
+    val products = spark.createDataFrame(
+      Seq(("A", 11.0), ("B", 22.0))).toDF("product", "pscore")
+
+    val result = users.nearestByJoin(
+      products,
+      abs(users("score") - products("pscore")),
+      numResults = 1,
+      mode = "exact",
+      direction = "distance")
+
+    // Only user 2 should appear; user 1 (NULL score) drops because no candidate has a
+    // non-null ranking value.
+    checkAnswer(
+      result.select("user_id", "product"),
+      Seq(Row(2, "B"))
+    )
+  }
+
+  test("left outer: NULL ranking values for all candidates preserves left with NULLs") {
+    // Same shape as the previous test, but LEFT OUTER preserves user 1 with NULL right-side
+    // columns instead of dropping it.
+    val users = spark.createDataFrame(
+      Seq[(Int, java.lang.Double)]((1, null), (2, 20.0d))).toDF("user_id", "score")
+    val products = spark.createDataFrame(
+      Seq(("A", 11.0), ("B", 22.0))).toDF("product", "pscore")
+
+    val result = users.nearestByJoin(
+      products,
+      abs(users("score") - products("pscore")),
+      numResults = 1,
+      joinType = "leftouter",
+      mode = "exact",
+      direction = "distance")
+
+    checkAnswer(
+      result.select("user_id", "product").orderBy("user_id"),
+      Seq(Row(1, null), Row(2, "B"))
+    )
+  }
+
+  test("numResults larger than right side returns min(k, available) per left row") {
+    // Right side has 3 rows; ask for 5. Each left row should get exactly 3 matches, not 5
+    // padded with NULLs.
+    val (users, products) = prepareForNearestByJoin()
+    val result = users.nearestByJoin(
+      products,
+      abs(users("score") - products("pscore")),
+      numResults = 5,
+      mode = "exact",
+      direction = "distance")
+
+    // 3 users x min(5, 3) = 9 rows.
+    assert(result.count() === 9)
+    // No NULL padding: every left row pairs with every product exactly once.
+    val perUser = result.groupBy("user_id").count().collect().map(r => r.getInt(0) -> r.getLong(1))
+    assert(perUser.toMap === Map(1 -> 3L, 2 -> 3L, 3 -> 3L))
+  }
+
+  test("duplicate left rows each get an independent top-K") {
+    // Two identical user rows must not be collapsed into a single group: each must independently
+    // produce its own top-K. This proves the per-row __qid tagging in the rewrite works.
+    val users = spark.createDataFrame(
+      Seq((1, 10.0), (1, 10.0))).toDF("user_id", "score")
+    val products = spark.createDataFrame(
+      Seq(("A", 11.0), ("B", 22.0), ("C", 5.0))).toDF("product", "pscore")
+
+    val result = users.nearestByJoin(
+      products,
+      abs(users("score") - products("pscore")),
+      numResults = 1,
+      mode = "exact",
+      direction = "distance")
+
+    // Two identical left rows -> two output rows, both pairing with product A (closest to 10.0).
+    checkAnswer(
+      result.select("user_id", "product"),
+      Seq(Row(1, "A"), Row(1, "A"))
+    )
+  }
+
+  test("conflicting column names between sides resolve via DataFrame qualifiers") {
+    // Both sides have a column named `score`; the ranking expression disambiguates via
+    // DataFrame-qualified accessors.
+    val left = spark.createDataFrame(Seq((1, 10.0), (2, 20.0))).toDF("id", "score")
+    val right = spark.createDataFrame(
+      Seq(("A", 11.0), ("B", 22.0), ("C", 5.0))).toDF("name", "score")
+
+    val result = left.nearestByJoin(
+      right,
+      -abs(left("score") - right("score")),
+      numResults = 1,
+      mode = "exact",
+      direction = "similarity")
+
+    checkAnswer(
+      result.select("id", "name").orderBy("id"),
+      Seq(Row(1, "A"), Row(2, "B"))
+    )
+    // Output schema should carry both `score` columns through (4 columns total).
+    assert(result.columns.length === 4)
+  }
+
+  test("streaming inputs are rejected at analysis time") {
+    // Build a streaming left side and a static right side; NearestByJoin must be rejected
+    // at analysis before the optimizer rewrite (an unconditioned cross-product fed into a
+    // global Aggregate keyed by a per-row identifier) ever runs.
+    import testImplicits._
+    implicit val ctx = spark.sqlContext
+    val streamingUsers = MemoryStream[(Int, Double)].toDF().toDF("user_id", "score")
+    val products = spark.createDataFrame(
+      Seq(("A", 11.0), ("B", 22.0), ("C", 5.0))).toDF("product", "pscore")
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        streamingUsers.nearestByJoin(
+          products,
+          -abs(streamingUsers("score") - products("pscore")),
+          numResults = 1,
+          mode = "exact",
+          direction = "similarity").queryExecution.analyzed
+      },
+      condition = "NEAREST_BY_JOIN.STREAMING_NOT_SUPPORTED",
+      parameters = Map.empty)
+  }
+
+  test("rejected when spark.sql.crossJoin.enabled is false") {
+    // The rewrite produces an unconditioned cross-product internally, so when the user has
+    // opted out of cross-products via `spark.sql.crossJoin.enabled = false`, NEAREST BY
+    // queries are rejected at analysis time with `NEAREST_BY_JOIN.CROSS_JOIN_NOT_ENABLED` --
+    // a NEAREST BY-specific error class added so the user does not see internal rewrite
+    // attributes in the error message.
+    withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
+      val (users, products) = prepareForNearestByJoin()
+      checkError(
+        exception = intercept[AnalysisException] {
+          users.nearestByJoin(
+            products,
+            -abs(users("score") - products("pscore")),
+            numResults = 1,
+            mode = "exact",
+            direction = "similarity").queryExecution.analyzed
+        },
+        condition = "NEAREST_BY_JOIN.CROSS_JOIN_NOT_ENABLED",
+        parameters = Map.empty)
+    }
+  }
+
+  test("exact + left outer: empty right side preserves all left rows with NULLs") {
+    // Exercises the EXACT + LEFT OUTER combination, which no other test covers together.
+    val (users, products) = prepareForNearestByJoin()
+    val emptyProducts = products.filter(lit(false))
+    val result = users.nearestByJoin(
+      emptyProducts,
+      -abs(users("score") - emptyProducts("pscore")),
+      numResults = 1,
+      joinType = "leftouter",
+      mode = "exact",
+      direction = "similarity")
+
+    checkAnswer(
+      result.select("user_id", "product").orderBy("user_id"),
+      Seq(Row(1, null), Row(2, null), Row(3, null))
+    )
+  }
+
+  test("SQL: APPROX NEAREST SIMILARITY") {
+    val (users, products) = prepareForNearestByJoin()
+    users.createOrReplaceTempView("t_users")
+    products.createOrReplaceTempView("t_products")
+    try {
+      val result = spark.sql(
+        """
+          |SELECT u.user_id, p.product
+          |FROM t_users u JOIN t_products p
+          |  APPROX NEAREST 1 BY SIMILARITY -abs(u.score - p.pscore)
+          |""".stripMargin)
+      checkAnswer(
+        result.orderBy("user_id"),
+        Seq(Row(1, "A"), Row(2, "B"), Row(3, "B"))
+      )
+    } finally {
+      spark.catalog.dropTempView("t_users")
+      spark.catalog.dropTempView("t_products")
+    }
+  }
+
+  test("SQL: EXACT NEAREST DISTANCE") {
+    val (users, products) = prepareForNearestByJoin()
+    users.createOrReplaceTempView("t_users")
+    products.createOrReplaceTempView("t_products")
+    try {
+      val result = spark.sql(
+        """
+          |SELECT u.user_id, p.product
+          |FROM t_users u JOIN t_products p
+          |  EXACT NEAREST 1 BY DISTANCE abs(u.score - p.pscore)
+          |""".stripMargin)
+      checkAnswer(
+        result.orderBy("user_id"),
+        Seq(Row(1, "A"), Row(2, "B"), Row(3, "B"))
+      )
+    } finally {
+      spark.catalog.dropTempView("t_users")
+      spark.catalog.dropTempView("t_products")
+    }
+  }
+
+  test("invalid numResults is rejected") {
+    val (users, products) = prepareForNearestByJoin()
+    Seq(0, 100001).foreach { k =>
+      checkError(
+        exception = intercept[AnalysisException] {
+          users.nearestByJoin(
+            products,
+            -abs(users("score") - products("pscore")),
+            numResults = k,
+            mode = "exact",
+            direction = "similarity")
+        },
+        condition = "NEAREST_BY_JOIN.NUM_RESULTS_OUT_OF_RANGE",
+        parameters = Map(
+          "numResults" -> k.toString,
+          "min" -> "1",
+          "max" -> "100000"))
+    }
+  }
+
+  test("invalid joinType is rejected") {
+    val (users, products) = prepareForNearestByJoin()
+    checkError(
+      exception = intercept[AnalysisException] {
+        users.nearestByJoin(
+          products,
+          -abs(users("score") - products("pscore")),
+          numResults = 1,
+          joinType = "rightouter",
+          mode = "approx",
+          direction = "similarity")
+      },
+      condition = "NEAREST_BY_JOIN.UNSUPPORTED_JOIN_TYPE",
+      parameters = Map(
+        "joinType" -> "rightouter",
+        "supported" -> NearestByJoinType.supportedDisplay))
+  }
+
+  test("invalid mode is rejected") {
+    val (users, products) = prepareForNearestByJoin()
+    checkError(
+      exception = intercept[AnalysisException] {
+        users.nearestByJoin(
+          products,
+          -abs(users("score") - products("pscore")),
+          numResults = 1,
+          joinType = "inner",
+          mode = "bogus",
+          direction = "similarity")
+      },
+      condition = "NEAREST_BY_JOIN.UNSUPPORTED_MODE",
+      parameters = Map(
+        "mode" -> "bogus",
+        "supported" -> NearestByJoinMode.supported.mkString("'", "', '", "'")))
+  }
+
+  test("invalid direction is rejected") {
+    val (users, products) = prepareForNearestByJoin()
+    checkError(
+      exception = intercept[AnalysisException] {
+        users.nearestByJoin(
+          products,
+          -abs(users("score") - products("pscore")),
+          numResults = 1,
+          mode = "exact",
+          direction = "bogus")
+      },
+      condition = "NEAREST_BY_JOIN.UNSUPPORTED_DIRECTION",
+      parameters = Map(
+        "direction" -> "bogus",
+        "supported" -> NearestByDirection.supported.mkString("'", "', '", "'")))
+  }
+
+  test("non-orderable ranking expression is rejected") {
+    val (users, products) = prepareForNearestByJoin()
+    checkError(
+      exception = intercept[AnalysisException] {
+        users.nearestByJoin(
+          products,
+          map(users("score"), products("pscore")),
+          numResults = 1,
+          mode = "exact",
+          direction = "similarity")
+      },
+      condition = "NEAREST_BY_JOIN.NON_ORDERABLE_RANKING_EXPRESSION",
+      parameters = Map(
+        "expression" -> "\"map(score, pscore)\"",
+        "type" -> "\"MAP<DOUBLE, DOUBLE>\""))
+  }
+
+  test("EXACT mode accepts nondeterministic ranking expression") {
+    val (users, products) = prepareForNearestByJoin()
+    val result = users.nearestByJoin(
+      products,
+      rand() + products("pscore"),
+      numResults = 1,
+      joinType = "inner",
+      mode = "exact",
+      direction = "similarity")
+
+    // Result rows are nondeterministic; only assert that each left row gets exactly one match.
+    assert(result.count() === 3)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index e65942689bc06..d838ba4c234f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -1642,6 +1642,43 @@ class DataFrameSetOperationsSuite extends SharedSparkSession with AdaptiveSparkP
       }
     }
   }
+
+  test("SPARK-51262: exceptAll after dropDuplicates with subset should not throw") {
+    // Data where dropDuplicates(subset) produces deterministic results - to avoid test flakiness.
+    val df1 = spark.createDataFrame(Seq(
+      (1, "a", 100),
+      (2, "b", 200),
+      (3, "c", 300)
+    )).toDF("id", "name", "value")
+
+    val df2 = spark.createDataFrame(Seq(
+      (1, "a", 100)
+    )).toDF("id", "name", "value")
+
+    // dropDuplicates with subset - each (id, name) is already unique so output is deterministic
+    val deduped = df1.dropDuplicates("id", "name")
+
+    // exceptAll should work without INTERNAL_ERROR_ATTRIBUTE_NOT_FOUND
+    val result = deduped.exceptAll(df2)
+    assert(result.columns === Array("id", "name", "value"))
+    val rows = result.collect().sortBy(_.getInt(0))
+    assert(rows.length === 2)
+    assert(rows(0) === Row(2, "b", 200))
+    assert(rows(1) === Row(3, "c", 300))
+
+    // Also verify except (non-all) works and returns correct values
+    val result2 = deduped.except(df2)
+    val rows2 = result2.collect().sortBy(_.getInt(0))
+    assert(rows2.length === 2)
+    assert(rows2(0) === Row(2, "b", 200))
+    assert(rows2(1) === Row(3, "c", 300))
+
+    // intersectAll should also work and return the matching row
+    val result3 = deduped.intersectAll(df2)
+    val rows3 = result3.collect()
+    assert(rows3.length === 1)
+    assert(rows3.head === Row(1, "a", 100))
+  }
 }
 
 case class UnionClass1a(a: Int, b: Long, nested: UnionClass2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index abeb335fdf2ca..abe7a385ea93d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -901,4 +901,94 @@ class DataFrameWriterV2Suite extends SharedSparkSession with BeforeAndAfter {
     insertDF.cache()
     checkAnswer(spark.table("testcat.table_name"), Seq(Row(1, "a"), Row(2, "b")))
   }
+
+  test("withSchemaEvolution: append evolves the table schema to add a new column") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint) USING foo")
+    val df = Seq((1L, "a")).toDF("id", "data")
+
+    // Without withSchemaEvolution the extra column is rejected.
+    checkError(
+      exception = intercept[AnalysisException](df.writeTo("testcat.table_name").append()),
+      condition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+      parameters = Map(
+        "tableName" -> "`testcat`.`table_name`",
+        "tableColumns" -> "`id`",
+        "dataColumns" -> "`id`, `data`"))
+
+    df.writeTo("testcat.table_name").withSchemaEvolution().append()
+
+    assert(spark.table("testcat.table_name").schema ===
+      new StructType().add("id", LongType).add("data", StringType))
+    checkAnswer(spark.table("testcat.table_name"), Seq(Row(1L, "a")))
+  }
+
+  test("withSchemaEvolution: overwrite evolves the table schema to add a new column") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint) USING foo")
+    val df = Seq((1L, "a")).toDF("id", "data")
+
+    df.writeTo("testcat.table_name").withSchemaEvolution().overwrite(lit(true))
+
+    assert(spark.table("testcat.table_name").schema ===
+      new StructType().add("id", LongType).add("data", StringType))
+    checkAnswer(spark.table("testcat.table_name"), Seq(Row(1L, "a")))
+  }
+
+  test("withSchemaEvolution: overwritePartitions evolves the table schema to add a new column") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint) USING foo PARTITIONED BY (id)")
+    val df = Seq((1L, "a")).toDF("id", "data")
+
+    df.writeTo("testcat.table_name").withSchemaEvolution().overwritePartitions()
+
+    assert(spark.table("testcat.table_name").schema ===
+      new StructType().add("id", LongType).add("data", StringType))
+    checkAnswer(spark.table("testcat.table_name"), Seq(Row(1L, "a")))
+  }
+
+  test("withSchemaEvolution: fails if the table does not support automatic schema evolution") {
+    spark.sql(
+      """CREATE TABLE testcat.table_name (id bigint) USING foo
+        |TBLPROPERTIES ('auto-schema-evolution' = 'false')""".stripMargin)
+    val df = Seq((1L, "a")).toDF("id", "data")
+
+    // With auto-schema-evolution disabled the extra column still fails.
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.writeTo("testcat.table_name").withSchemaEvolution().append()
+      },
+      condition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+      parameters = Map(
+        "tableName" -> "`testcat`.`table_name`",
+        "tableColumns" -> "`id`",
+        "dataColumns" -> "`id`, `data`"))
+  }
+
+  test("withSchemaEvolution: create fails with CREATE_TABLE sub-error") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.table("source").writeTo("testcat.new_table").using("foo")
+          .withSchemaEvolution().create()
+      },
+      condition = "UNSUPPORTED_SCHEMA_EVOLUTION.CREATE_TABLE",
+      parameters = Map.empty)
+  }
+
+  test("withSchemaEvolution: replace/createOrReplace fail with REPLACE_TABLE sub-error") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.table("source").writeTo("testcat.table_name").using("foo")
+          .withSchemaEvolution().replace()
+      },
+      condition = "UNSUPPORTED_SCHEMA_EVOLUTION.REPLACE_TABLE",
+      parameters = Map.empty)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.table("source").writeTo("testcat.table_name").using("foo")
+          .withSchemaEvolution().createOrReplace()
+      },
+      condition = "UNSUPPORTED_SCHEMA_EVOLUTION.REPLACE_TABLE",
+      parameters = Map.empty)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 04be1e8fcfba3..af52204dbb7ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -251,6 +251,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
     checkKeywordsExistsInExplain(df,
       "Project [id#xL AS ifnull(id, 1)#xL, if ((id#xL = 1)) null " +
         "else id#xL AS nullif(id, 1)#xL, id#xL AS nvl(id, 1)#xL, 1 AS nvl2(id, 1, 2)#x]")
+    checkKeywordsNotExistsInExplain(df, ExtendedMode, "typednullliteral")
   }
 
   test("SPARK-26659: explain of DataWritingCommandExec should not contain duplicate cmd.nodeName") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 8aa6f5a5d0e6e..1fc45e9703f9e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -878,6 +878,29 @@ class FileBasedDataSourceSuite extends SharedSparkSession
     assert(fileList.toSet === expectedFileList.toSet)
   }
 
+  test("recursiveFileLookup with a partitioned catalog table is rejected") {
+    withTable("part_tbl") {
+      sql(
+        """
+          |CREATE TABLE part_tbl (id INT, value STRING)
+          |USING parquet
+          |PARTITIONED BY (year INT)
+          |""".stripMargin)
+      sql("INSERT INTO part_tbl PARTITION (year = 2024) VALUES (1, 'a')")
+      sql("INSERT INTO part_tbl PARTITION (year = 2025) VALUES (2, 'b')")
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read
+            .option("recursiveFileLookup", "true")
+            .table("part_tbl")
+            .collect()
+        },
+        condition = "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE",
+        parameters = Map.empty[String, String]
+      )
+    }
+  }
+
   test("Return correct results when data columns overlap with partition columns") {
     Seq("parquet", "orc", "json").foreach { format =>
       withTempPath { path =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 015ea9defae94..1944b5679c848 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.trees.LeafLike
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, StructType}
 
 class GeneratorFunctionSuite extends SharedSparkSession {
   import testImplicits._
@@ -765,6 +765,59 @@ class GeneratorFunctionSuite extends SharedSparkSession {
       Seq(Row(0, 10, 0, 10), Row(1, 20, 1, 20))
     )
   }
+
+  test("SPARK-48091: explode with transform should preserve struct field aliases") {
+    val df = spark.createDataFrame(Seq((1, Array(1, 2, 3), Array(4, 5, 6))))
+      .toDF("id", "my_array", "my_array2")
+
+    // Without explode - aliases should work (baseline)
+    val good = df.select(
+      transform(col("my_array2"), x => struct(x.as("data"))).as("my_struct")
+    )
+    assert(good.schema("my_struct").dataType.asInstanceOf[ArrayType]
+      .elementType.asInstanceOf[StructType].fieldNames.toSeq === Seq("data"))
+
+    // With explode in same select - aliases should still be preserved
+    val result = df.select(
+      explode(col("my_array")).as("exploded"),
+      transform(col("my_array2"), x => struct(x.as("data"))).as("my_struct")
+    )
+    assert(result.schema("my_struct").dataType.asInstanceOf[ArrayType]
+      .elementType.asInstanceOf[StructType].fieldNames.toSeq === Seq("data"))
+
+    // Multiple aliases inside struct
+    val result2 = df.select(
+      explode(col("my_array")).as("exploded"),
+      transform(col("my_array2"),
+        x => struct(x.as("value"), col("id").as("key"))
+      ).as("my_struct")
+    )
+    val fields2 = result2.schema("my_struct").dataType.asInstanceOf[ArrayType]
+      .elementType.asInstanceOf[StructType].fieldNames.toSeq
+    assert(fields2 === Seq("value", "key"))
+  }
+
+  test("SPARK-56426: LATERAL VIEW column alias with dot in name should resolve correctly") {
+    // Single-alias: explode with a dotted alias
+    checkAnswer(
+      sql(
+        """
+          |SELECT id, `skill.inst`
+          |FROM VALUES (1, array('a', 'b')) AS t(id, skills)
+          |LATERAL VIEW explode(skills) skills_table AS `skill.inst`
+        """.stripMargin),
+      Row(1, "a") :: Row(1, "b") :: Nil)
+
+    // Multi-alias: inline with multiple dotted aliases
+    checkAnswer(
+      sql(
+        """
+          |SELECT `a.b`, `c.d`
+          |FROM (SELECT 1) t
+          |LATERAL VIEW inline(array(named_struct('f1', 10, 'f2', 'hello'))) gen AS `a.b`, `c.d`
+        """.stripMargin),
+      Row(10, "hello") :: Nil)
+  }
 }
 
 case class EmptyGenerator() extends Generator with LeafLike[Expression] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeographyDataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeographyDataFrameSuite.scala
index 77e4e48679370..ee633590bd252 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeographyDataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeographyDataFrameSuite.scala
@@ -189,6 +189,24 @@ class GeographyDataFrameSuite extends SharedSparkSession {
     checkAnswer(df, Seq(Row(expectedGeog)))
   }
 
+  test("SPARK-57058: ORDER BY on a Geography column fails with INVALID_ORDERING_TYPE") {
+    val rdd = sparkContext.parallelize(Seq(
+      Row(Geography.fromWKB(point1, 4326)),
+      Row(Geography.fromWKB(point2, 4326))))
+    val schema = StructType(Seq(StructField("g", GeographyType(4326), nullable = false)))
+    spark.createDataFrame(rdd, schema).createOrReplaceTempView("geog_t")
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql("SELECT g FROM geog_t ORDER BY g").collect()
+      },
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      parameters = Map(
+        "functionName" -> "`sortorder`",
+        "dataType" -> "\"GEOGRAPHY(4326)\"",
+        "sqlExpr" -> "\"g ASC NULLS FIRST\""),
+      queryContext = Array(ExpectedContext("g", 30, 30)))
+  }
+
   test("geospatial feature disabled") {
     withSQLConf(SQLConf.GEOSPATIAL_ENABLED.key -> "false") {
       val geography = Geography.fromWKB(point1, 4326)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeometryDataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeometryDataFrameSuite.scala
index e460c04d6220c..62cf25ddcdbaf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeometryDataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeometryDataFrameSuite.scala
@@ -193,6 +193,24 @@ class GeometryDataFrameSuite extends SharedSparkSession {
     checkAnswer(df, Seq(Row(expectedGeom)))
   }
 
+  test("SPARK-57058: ORDER BY on a Geometry column fails with INVALID_ORDERING_TYPE") {
+    val rdd = sparkContext.parallelize(Seq(
+      Row(Geometry.fromWKB(point1, 0)),
+      Row(Geometry.fromWKB(point2, 0))))
+    val schema = StructType(Seq(StructField("g", GeometryType(0), nullable = false)))
+    spark.createDataFrame(rdd, schema).createOrReplaceTempView("geo_t")
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql("SELECT g FROM geo_t ORDER BY g").collect()
+      },
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      parameters = Map(
+        "functionName" -> "`sortorder`",
+        "dataType" -> "\"GEOMETRY(0)\"",
+        "sqlExpr" -> "\"g ASC NULLS FIRST\""),
+      queryContext = Array(ExpectedContext("g", 29, 29)))
+  }
+
   test("geospatial feature disabled") {
     withSQLConf(SQLConf.GEOSPATIAL_ENABLED.key -> "false") {
       val geometry = Geometry.fromWKB(point1, 4326)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 42aa2b46fb727..3ea77446f268d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1288,6 +1288,17 @@ class JoinSuite extends SharedSparkSession with AdaptiveSparkPlanHelper
     }
   }
 
+  test("SPARK-36082: only use SingleColumn Null Aware Anti Join when right side " +
+      "can broadcast") {
+    withSQLConf(SQLConf.OPTIMIZE_NULL_AWARE_ANTI_JOIN.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0") {
+      val joinExec = assertJoin((
+        "select * from testData where key not in (select b from testData3)",
+        classOf[BroadcastNestedLoopJoinExec]))
+      assert(!joinExec.isInstanceOf[BroadcastHashJoinExec])
+    }
+  }
+
   test("SPARK-32399: Full outer shuffled hash join") {
     val inputDFs = Seq(
       // Test unique join key
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
index d6b22431e854e..ca7732772b588 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
@@ -20,10 +20,12 @@ package org.apache.spark.sql
 import java.time.{Instant, LocalDate, LocalDateTime, ZoneId}
 
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
+import org.apache.spark.sql.catalyst.analysis.{BindParameters, CTESubstitution, ExpressionWithUnresolvedIdentifier, NameParameterizedQuery, PlanWithUnresolvedIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.Limit
+import org.apache.spark.sql.catalyst.plans.logical.{CacheTableAsSelect, CTEInChildren, Limit, OverwriteByExpression, ReplaceTableAsSelect, WithCTE}
 import org.apache.spark.sql.catalyst.trees.SQLQueryContext
+import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.functions.{array, call_function, lit, map, map_from_arrays, map_from_entries, str_to_map, struct}
 import org.apache.spark.sql.internal.SQLConf
@@ -2460,4 +2462,248 @@ class ParametersSuite extends SharedSparkSession {
       spark.sql("SELECT 1", Array.empty[Any]),
       Row(1))
   }
+
+  // SPARK-46625: WITH ... <write-with-IDENTIFIER> SELECT ... FROM cte
+  // The placeholder is pushed into the command's identifier slot at parse time, so
+  // `CTESubstitution` sees the `CTEInChildren` directly and never produces the invalid
+  // `WithCTE(InsertIntoStatement, ...)` / `WithCTE(CreateTableAsSelect, ...)` shape.
+  private def assertNoWithCTEAroundCTEInChildren(df: DataFrame): Unit = {
+    df.queryExecution.analyzed.foreach {
+      case WithCTE(_: CTEInChildren, _) =>
+        fail(s"Found invalid WithCTE(CTEInChildren, _) shape:\n${df.queryExecution.analyzed}")
+      case _ =>
+    }
+  }
+
+  test("SPARK-46625: WITH ... INSERT OVERWRITE TABLE IDENTIFIER(:p) SELECT ... FROM cte") {
+    withTable("t_cte_overwrite") {
+      sql("CREATE TABLE t_cte_overwrite (a INT) USING PARQUET")
+      sql("INSERT INTO t_cte_overwrite VALUES (10)")
+      val df = spark.sql(
+        """WITH transformation AS (SELECT 1 AS a)
+          |INSERT OVERWRITE TABLE IDENTIFIER(:tname)
+          |SELECT * FROM transformation""".stripMargin,
+        Map("tname" -> "t_cte_overwrite"))
+      assertNoWithCTEAroundCTEInChildren(df)
+      checkAnswer(spark.table("t_cte_overwrite"), Row(1))
+    }
+  }
+
+  test("SPARK-46625: WITH ... INSERT INTO IDENTIFIER(:p) SELECT ... FROM cte") {
+    withTable("t_cte_into") {
+      sql("CREATE TABLE t_cte_into (a INT) USING PARQUET")
+      val df = spark.sql(
+        """WITH transformation AS (SELECT 7 AS a)
+          |INSERT INTO IDENTIFIER(:tname)
+          |SELECT * FROM transformation""".stripMargin,
+        Map("tname" -> "t_cte_into"))
+      assertNoWithCTEAroundCTEInChildren(df)
+      checkAnswer(spark.table("t_cte_into"), Row(7))
+    }
+  }
+
+  test("SPARK-46625: CREATE TABLE IDENTIFIER(:p) AS WITH ... SELECT ... FROM cte") {
+    withTable("t_cte_ctas") {
+      val df = spark.sql(
+        """CREATE TABLE IDENTIFIER(:tname) USING PARQUET AS
+          |WITH transformation AS (SELECT 3 AS a)
+          |SELECT * FROM transformation""".stripMargin,
+        Map("tname" -> "t_cte_ctas"))
+      assertNoWithCTEAroundCTEInChildren(df)
+      checkAnswer(spark.table("t_cte_ctas"), Row(3))
+    }
+  }
+
+  // SPARK-46625: legacy parameter-substitution mode triggers the parameters.scala traversal
+  // path. The placeholder lives in `InsertIntoStatement.table`, which is *not* a child, so this
+  // exercises the `InsertIntoStatement` special-case in `BindParameters.bind` that recurses into
+  // the `table` slot, and the `getDefaultTreePatternBits` override on `InsertIntoStatement` that
+  // exposes `table`'s tree-pattern bits for pruning.
+  test("SPARK-46625: INSERT IDENTIFIER(:p) under legacy parameter substitution") {
+    withSQLConf(SQLConf.LEGACY_PARAMETER_SUBSTITUTION_CONSTANTS_ONLY.key -> "true") {
+      withTable("t_legacy_param") {
+        sql("CREATE TABLE t_legacy_param (a INT) USING PARQUET")
+        spark.sql(
+          """WITH transformation AS (SELECT 11 AS a)
+            |INSERT INTO IDENTIFIER(:tname)
+            |SELECT * FROM transformation""".stripMargin,
+          Map("tname" -> "t_legacy_param"))
+        checkAnswer(spark.table("t_legacy_param"), Row(11))
+      }
+    }
+  }
+
+  // SPARK-46625: INSERT INTO REPLACE WHERE goes through `OverwriteByExpression`, whose `table`
+  // slot is typed `NamedRelation`. `PlanWithUnresolvedIdentifier` extends `NamedRelation` so the
+  // placeholder sits in the slot directly. Verify on the parsed plan that the placeholder lives
+  // in `OverwriteByExpression.table` rather than wrapping the whole command -- running the
+  // analyzer fully would require a v2 catalog.
+  test("SPARK-46625: WITH ... INSERT INTO IDENTIFIER(:p) REPLACE WHERE ... parser") {
+    // Use a non-literal-string expression so `withIdentClause` produces
+    // `PlanWithUnresolvedIdentifier` rather than short-circuiting to `UnresolvedRelation`.
+    val parsedPlan = spark.sessionState.sqlParser.parsePlan(
+      """WITH transformation AS (SELECT 99 AS a)
+        |INSERT INTO IDENTIFIER('some' || '_table') REPLACE WHERE a = 10
+        |SELECT * FROM transformation""".stripMargin)
+    val overwrite = parsedPlan.collectFirst { case o: OverwriteByExpression => o }.getOrElse(
+      fail(s"Expected OverwriteByExpression in parsed plan:\n$parsedPlan"))
+    assert(overwrite.table.isInstanceOf[PlanWithUnresolvedIdentifier],
+      s"Expected OverwriteByExpression.table to be PlanWithUnresolvedIdentifier, " +
+        s"got ${overwrite.table.getClass.getSimpleName}:\n$parsedPlan")
+    // After CTESubstitution runs, the CTE defs should land on the command's children (because
+    // OverwriteByExpression is a CTEInChildren) -- never as `WithCTE(OverwriteByExpression, _)`.
+    val substituted = CTESubstitution.apply(parsedPlan)
+    substituted.foreach {
+      case WithCTE(_: CTEInChildren, _) =>
+        fail(s"Found invalid WithCTE(CTEInChildren, _) shape after CTESubstitution:\n$substituted")
+      case _ =>
+    }
+  }
+
+  // SPARK-46625: Parameter inside `IDENTIFIER(:p)` on REPLACE WHERE lives in
+  // `OverwriteByExpression.table`, which is a non-child slot. Verify that
+  // `BindParameters.bind` reaches into the slot via the explicit `OverwriteByExpression`
+  // recursion (parameters.scala) and that the `getDefaultTreePatternBits` override on
+  // `OverwriteByExpression` exposes the PARAMETER bit for pruning. Done at the rule level
+  // because driving REPLACE WHERE through full analysis would require a v2 catalog.
+  test("SPARK-46625: BindParameters recurses into OverwriteByExpression.table") {
+    val parsedPlan = spark.sessionState.sqlParser.parsePlan(
+      """INSERT INTO IDENTIFIER(:tname) REPLACE WHERE a = 10
+        |SELECT 1 AS a""".stripMargin)
+    val overwrite = parsedPlan.collectFirst { case o: OverwriteByExpression => o }.getOrElse(
+      fail(s"Expected OverwriteByExpression in parsed plan:\n$parsedPlan"))
+    // Pruning prerequisite: the PARAMETER bit must be visible at the OverwriteByExpression
+    // level (it lives inside `table`, which is not a child); this exercises the
+    // `getDefaultTreePatternBits` override.
+    assert(overwrite.containsPattern(PARAMETER),
+      "OverwriteByExpression.getDefaultTreePatternBits must propagate `table`'s PARAMETER bit")
+
+    val bound = BindParameters.apply(
+      NameParameterizedQuery(parsedPlan, Seq("tname"), Seq(Literal("foo_table"))))
+    val boundOverwrite = bound.collectFirst { case o: OverwriteByExpression => o }.getOrElse(
+      fail(s"Expected OverwriteByExpression in bound plan:\n$bound"))
+    assert(!boundOverwrite.table.containsPattern(PARAMETER),
+      s"Expected :tname inside OverwriteByExpression.table to be bound, got:\n$boundOverwrite")
+  }
+
+  // SPARK-46625 followup: `INSERT INTO IDENTIFIER(<sql-variable>) ...` places a
+  // `PlanWithUnresolvedIdentifier` in `InsertIntoStatement.table`, whose `identifierExpr`
+  // holds an `UnresolvedAttribute` for the variable name. That slot is a non-child
+  // `LogicalPlan`, so the default `ResolveReferences` traversal never resolves the
+  // attribute, `ResolveIdentifierClause` cannot fire (it waits on `identifierExpr.resolved`),
+  // and analysis fails. Verify that the explicit `InsertIntoStatement` case added to
+  // `ResolveReferences` rewrites the attribute to a `VariableReference` and the insert
+  // completes end-to-end.
+  test("SPARK-46625: INSERT INTO IDENTIFIER(<sql-variable>) resolves variable in table slot") {
+    withTable("t_var_insert") {
+      sql("CREATE TABLE t_var_insert (a INT) USING PARQUET")
+      sql("DECLARE OR REPLACE VARIABLE target_table STRING")
+      try {
+        sql("SET VAR target_table = 't_var_insert'")
+        sql("INSERT INTO IDENTIFIER(target_table) SELECT 42 AS a")
+        checkAnswer(spark.table("t_var_insert"), Row(42))
+      } finally {
+        sql("DROP TEMPORARY VARIABLE IF EXISTS target_table")
+      }
+    }
+  }
+
+  // SPARK-46625 followup: when the SQL variable name in `IDENTIFIER(<name>)` collides
+  // with a query output column, the IDENTIFIER expression must still bind to the
+  // variable, not to the column. The `ResolveReferences` case for `InsertIntoStatement`
+  // resolves `identifierExpr` against the `PlanWithUnresolvedIdentifier` itself (whose
+  // `children` are `Nil` on this path), not against the surrounding `InsertIntoStatement`
+  // (whose child is `query`), so query output columns are out of scope and only the
+  // last-resort variable resolution path fires.
+  test("SPARK-46625: INSERT INTO IDENTIFIER(<sql-variable>) ignores colliding query columns") {
+    withTable("t_shadow") {
+      sql("CREATE TABLE t_shadow (a INT) USING PARQUET")
+      sql("DECLARE OR REPLACE VARIABLE a STRING DEFAULT 't_shadow'")
+      try {
+        sql("INSERT INTO IDENTIFIER(a) SELECT 42 AS a")
+        checkAnswer(spark.table("t_shadow"), Row(42))
+      } finally {
+        sql("DROP TEMPORARY VARIABLE IF EXISTS a")
+      }
+    }
+  }
+
+  // SPARK-46625: `CacheTableAsSelect.tempViewName` is an `Expression` slot, so an
+  // `IDENTIFIER(<non-literal>)` produces an `ExpressionWithUnresolvedIdentifier` there instead of
+  // wrapping the entire command in a `PlanWithUnresolvedIdentifier`. Verify on the parsed plan
+  // that the name slot holds the expression placeholder and no `WithCTE(CTEInChildren, _)` shape
+  // survives `CTESubstitution` (running the cache through full analysis would require the temp
+  // view machinery, so this is a parser-level test).
+  test("SPARK-46625: CACHE TABLE IDENTIFIER(...) AS WITH ... SELECT ... parser") {
+    val parsedPlan = spark.sessionState.sqlParser.parsePlan(
+      """CACHE TABLE IDENTIFIER('some' || '_view') AS
+        |WITH transformation AS (SELECT 4 AS a)
+        |SELECT * FROM transformation""".stripMargin)
+    val ctas = parsedPlan.collectFirst { case c: CacheTableAsSelect => c }.getOrElse(
+      fail(s"Expected CacheTableAsSelect in parsed plan:\n$parsedPlan"))
+    assert(ctas.tempViewName.isInstanceOf[ExpressionWithUnresolvedIdentifier],
+      s"Expected CacheTableAsSelect.tempViewName to be ExpressionWithUnresolvedIdentifier, " +
+        s"got ${ctas.tempViewName.getClass.getSimpleName}:\n$parsedPlan")
+    val substituted = CTESubstitution.apply(parsedPlan)
+    substituted.foreach {
+      case WithCTE(_: CTEInChildren, _) =>
+        fail(s"Found invalid WithCTE(CTEInChildren, _) shape after CTESubstitution:\n$substituted")
+      case _ =>
+    }
+  }
+
+  // SPARK-46625: Regression for the `if c.tempViewName.resolved` guard in CheckAnalysis. When
+  // the IDENTIFIER expression itself fails to resolve (e.g. references an unresolved column),
+  // the guard skips the invariant-validation case so the catch-all `LogicalPlan` case can
+  // produce `UNRESOLVED_COLUMN`. Without the guard, the invariant case would pre-empt this
+  // path and throw a `SparkException internal error` instead.
+  test("SPARK-46625: CACHE TABLE IDENTIFIER(<unresolved-col>) reports UNRESOLVED_COLUMN") {
+    val ex = intercept[AnalysisException] {
+      spark.sql("CACHE TABLE IDENTIFIER(unresolved_col) AS SELECT 1 AS a")
+    }
+    assert(ex.getCondition != null && ex.getCondition.startsWith("UNRESOLVED_COLUMN"),
+      s"Expected UNRESOLVED_COLUMN.*, got ${ex.getCondition}: ${ex.getMessage}")
+    assert(!ex.getMessage.contains("CacheTableAsSelect.tempViewName must be"),
+      s"Internal-error message leaked into user-facing error: ${ex.getMessage}")
+  }
+
+  // SPARK-46625: End-to-end CACHE TABLE IDENTIFIER(:p) AS WITH ... SELECT ... -- exercises the
+  // `tempViewNameString` extraction in `DataSourceV2Strategy` and the `CheckAnalysis` invariant
+  // case for `CacheTableAsSelect.tempViewName`. The parser-level test above already verifies
+  // the placement and CTE shape; this one drives the full analysis + execution path.
+  test("SPARK-46625: CACHE TABLE IDENTIFIER(:p) AS WITH ... SELECT ...") {
+    withTempView("t_cte_cache") {
+      val df = spark.sql(
+        """CACHE TABLE IDENTIFIER(:tname) AS
+          |WITH transformation AS (SELECT 21 AS a)
+          |SELECT * FROM transformation""".stripMargin,
+        Map("tname" -> "t_cte_cache"))
+      assertNoWithCTEAroundCTEInChildren(df)
+      checkAnswer(spark.table("t_cte_cache"), Row(21))
+    }
+  }
+
+  // SPARK-46625: RTAS mirrors CTAS -- the placeholder goes into `ReplaceTableAsSelect.name`
+  // at parse time. Verify on the parsed plan that the placeholder lives in that slot and that
+  // no `WithCTE(CTEInChildren, _)` shape survives `CTESubstitution`. Running RTAS through full
+  // analysis would require a v2 catalog, so this is a parser-level test.
+  test("SPARK-46625: REPLACE TABLE IDENTIFIER(...) AS WITH ... SELECT ... parser") {
+    // Use a non-literal-string expression so `withIdentClause` produces
+    // `PlanWithUnresolvedIdentifier` rather than short-circuiting to `UnresolvedIdentifier`.
+    val parsedPlan = spark.sessionState.sqlParser.parsePlan(
+      """REPLACE TABLE IDENTIFIER('some' || '_table') USING PARQUET AS
+        |WITH transformation AS (SELECT 5 AS a)
+        |SELECT * FROM transformation""".stripMargin)
+    val rtas = parsedPlan.collectFirst { case r: ReplaceTableAsSelect => r }.getOrElse(
+      fail(s"Expected ReplaceTableAsSelect in parsed plan:\n$parsedPlan"))
+    assert(rtas.name.isInstanceOf[PlanWithUnresolvedIdentifier],
+      s"Expected ReplaceTableAsSelect.name to be PlanWithUnresolvedIdentifier, " +
+        s"got ${rtas.name.getClass.getSimpleName}:\n$parsedPlan")
+    val substituted = CTESubstitution.apply(parsedPlan)
+    substituted.foreach {
+      case WithCTE(_: CTEInChildren, _) =>
+        fail(s"Found invalid WithCTE(CTEInChildren, _) shape after CTESubstitution:\n$substituted")
+      case _ =>
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanMergeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanMergeSuite.scala
index 1e31453b42f23..e1109f20e6040 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanMergeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanMergeSuite.scala
@@ -395,4 +395,47 @@ class PlanMergeSuite extends SharedSparkSession
       }
     }
   }
+
+  test("SPARK-56677: Merge scalar subqueries with filter propagation through Join") {
+    // subquery1 has no filter; subquery2 filters on b > 1 (a column from the right side of the join
+    // that is not part of the join condition). Predicate pushdown can only push this filter to
+    // testData2, not to testData, so only the right child differs between the two subqueries.
+    Seq(false, true).foreach { enableAQE =>
+      Seq(true, false).foreach { filterPropagationThroughJoinEnabled =>
+        withSQLConf(
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> enableAQE.toString,
+          SQLConf.MERGE_SUBPLANS_FILTER_PROPAGATION_THROUGH_JOIN_ENABLED.key ->
+            filterPropagationThroughJoinEnabled.toString,
+          // ObjectSerializerPruning produces different scan shapes depending on whether a Filter is
+          // present. Disabling the rule makes both scans identical so PlanMerger can merge them.
+          SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
+            "org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning") {
+          val df = sql(
+            """
+              |SELECT
+              |  (SELECT sum(key) FROM testData JOIN testData2 ON key = a),
+              |  (SELECT sum(key) FROM testData JOIN testData2 ON key = a WHERE b > 1)
+            """.stripMargin)
+
+          checkAnswer(df, Row(12, 6) :: Nil)
+
+          val plan = df.queryExecution.executedPlan
+          val subqueryIds = collectWithSubqueries(plan) { case s: SubqueryExec => s.id }
+          val reusedSubqueryIds = collectWithSubqueries(plan) {
+            case rs: ReusedSubqueryExec => rs.child.id
+          }
+
+          if (filterPropagationThroughJoinEnabled) {
+            assert(subqueryIds.size == 1, "Missing or unexpected SubqueryExec in the plan")
+            assert(reusedSubqueryIds.size == 1,
+              "Missing or unexpected ReusedSubqueryExec in the plan")
+          } else {
+            assert(subqueryIds.size == 2, "Missing or unexpected SubqueryExec in the plan")
+            assert(reusedSubqueryIds.size == 0,
+              "Missing or unexpected ReusedSubqueryExec in the plan")
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 036ec943127d8..291aa7cab7256 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -300,6 +300,28 @@ trait QueryTestBase
     super.withSQLConf(pairs: _*)(f)
   }
 
+  /**
+   * Temporarily sets SparkContext configuration values for testing.
+   * This is for configs that must be set on the SparkContext (not
+   * SQLConf), such as testing flags.
+   */
+  protected def withSparkContextConf[T](
+      pairs: (String, String)*)(f: => T): T = {
+    val sc = spark.sparkContext
+    val oldValues = pairs.map { case (k, _) =>
+      k -> sc.conf.getOption(k)
+    }
+    try {
+      pairs.foreach { case (k, v) => sc.conf.set(k, v) }
+      f
+    } finally {
+      oldValues.foreach {
+        case (k, Some(v)) => sc.conf.set(k, v)
+        case (k, None) => sc.conf.remove(k)
+      }
+    }
+  }
+
   /**
    * Drops functions after calling `f`. A function is represented by (functionName, isTemporary).
    */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
index 40ed0b301e1a4..c3b6bd676d585 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -18,7 +18,13 @@
 package org.apache.spark.sql
 
 import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.catalyst.analysis.TableOutputResolver
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, ResolveDefaultColumns}
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, StringType}
 
 class ResolveDefaultColumnsSuite extends SharedSparkSession {
   test("column without default value defined (null as default)") {
@@ -308,4 +314,114 @@ class ResolveDefaultColumnsSuite extends SharedSparkSession {
       checkAnswer(sql(s"SELECT * FROM $tableName"), Seq(Row(0, user)))
     }
   }
+
+  test("SPARK-57187: current_user() as default for CHAR column should not throw INTERNAL_ERROR") {
+    val tableName = "test_current_user_char"
+    val user = spark.sparkContext.sparkUser
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(i int, s CHAR(100) DEFAULT current_user()) USING parquet")
+      sql(s"INSERT INTO $tableName (i) VALUES (1)")
+      val result = sql(s"SELECT i, TRIM(s) FROM $tableName").collect()
+      assert(result.length == 1)
+      assert(result.head.getInt(0) == 1)
+      assert(result.head.getString(1) == user)
+    }
+  }
+
+  test("SPARK-57187: current_user() as default for VARCHAR column") {
+    val tableName = "test_current_user_varchar"
+    val user = spark.sparkContext.sparkUser
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(i int, s VARCHAR(100) DEFAULT current_user()) USING parquet")
+      sql(s"INSERT INTO $tableName (i) VALUES (1)")
+      checkAnswer(sql(s"SELECT * FROM $tableName"), Seq(Row(1, user)))
+    }
+  }
+
+  test("SPARK-57187: ALTER TABLE with current_user() default for CHAR column") {
+    val tableName = "test_current_user_char_alter"
+    val user = spark.sparkContext.sparkUser
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(id INT, created_by CHAR(100)) USING parquet")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN created_by SET DEFAULT current_user()")
+      sql(s"INSERT INTO $tableName (id) VALUES (1)")
+      val result = sql(s"SELECT id, TRIM(created_by) FROM $tableName").collect()
+      assert(result.length == 1)
+      assert(result.head.getInt(0) == 1)
+      assert(result.head.getString(1) == user)
+    }
+  }
+
+  test("SPARK-57187: foldable default exceeding CHAR/VARCHAR length fails at DDL time") {
+    // Foldable expressions are still validated eagerly at DDL time (existing behavior)
+    Seq("CHAR", "VARCHAR").foreach { typeName =>
+      checkError(
+        exception = intercept[SparkRuntimeException](
+          sql(s"CREATE TABLE t(c $typeName(3) DEFAULT 'toolong') USING parquet")),
+        condition = "EXCEED_LIMIT_LENGTH",
+        parameters = Map("limit" -> "3"))
+    }
+  }
+
+  test("SPARK-57187: non-foldable default exceeding CHAR/VARCHAR length fails at INSERT time " +
+      "(implicit default)") {
+    // current_user() exceeds CHAR(1)/VARCHAR(1) -- DDL succeeds because the expression is
+    // non-foldable, but INSERT should fail at runtime with EXCEED_LIMIT_LENGTH.
+    Seq("CHAR", "VARCHAR").foreach { typeName =>
+      withTable("t") {
+        sql(s"CREATE TABLE t(i INT, s $typeName(1) DEFAULT current_user()) USING parquet")
+        checkError(
+          exception = intercept[SparkRuntimeException](
+            sql("INSERT INTO t (i) VALUES (1)")),
+          condition = "EXCEED_LIMIT_LENGTH",
+          parameters = Map("limit" -> "1"))
+      }
+    }
+  }
+
+  test("SPARK-57187: non-foldable default exceeding CHAR/VARCHAR length fails at INSERT time " +
+      "(explicit DEFAULT keyword)") {
+    // Using the explicit DEFAULT keyword in VALUES goes through the checkField path.
+    Seq("CHAR", "VARCHAR").foreach { typeName =>
+      withTable("t") {
+        sql(s"CREATE TABLE t(i INT, s $typeName(1) DEFAULT current_user()) USING parquet")
+        checkError(
+          exception = intercept[SparkRuntimeException](
+            sql("INSERT INTO t VALUES (1, DEFAULT)")),
+          condition = "EXCEED_LIMIT_LENGTH",
+          parameters = Map("limit" -> "1"))
+      }
+    }
+  }
+
+  test("SPARK-57187: by-position default fill applies the CHAR/VARCHAR length check") {
+    // The by-position fill path (resolveColumnsByPosition under RECURSE / V2 schema evolution)
+    // shares the same applyDefaultWithLengthCheck helper as the by-name path. This drives that
+    // path directly and asserts the trailing default column is wrapped with the write-side
+    // length check, so an oversized non-foldable default is caught at runtime there too.
+    // Expected schema: (i INT, s CHAR(100) DEFAULT current_user()). CHAR is stored as StringType
+    // plus the raw-type metadata, exactly as the catalog represents it. CHAR(100) is wide enough
+    // that the resolved default does not trip the eager DDL-time length check, so we observe the
+    // write-side runtime check that the by-position fill path now adds.
+    val charMeta = new MetadataBuilder()
+      .putString(CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY, "char(100)")
+      .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "current_user()")
+      .build()
+    val expected = Seq(
+      AttributeReference("i", IntegerType)(),
+      AttributeReference("s", StringType, nullable = true, metadata = charMeta)())
+    // A by-position INSERT that supplies only the leading column, omitting the trailing CHAR one.
+    val query = LocalRelation(AttributeReference("i", IntegerType)())
+
+    val resolved = TableOutputResolver.resolveOutputColumns(
+      "t", expected, query, byName = false, spark.sessionState.conf,
+      TableOutputResolver.DefaultValueFillMode.RECURSE)
+
+    val hasLengthCheck = resolved.expressions.exists(_.exists {
+      case s: StaticInvoke => s.functionName == "charTypeWriteSideCheck"
+      case _ => false
+    })
+    assert(hasLengthCheck,
+      "by-position default fill must apply the CHAR/VARCHAR write-side length check")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 3cf26aa94a5d1..8028970193acd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -50,6 +50,7 @@ trait SQLQueryTestHelper extends SQLConfHelper with Logging {
   protected def replaceNotIncludedMsg(line: String): String = {
     line.replaceAll("#\\d+", "#x")
       .replaceAll("plan_id=\\d+", "plan_id=x")
+      .replaceAll("uuid\\(Some\\(-?\\d+\\)\\)", "uuid(Some(x))")
       .replaceAll(
         s"Location.*$clsName/",
         s"Location $notIncludedMsg/{warehouse_dir}/")
@@ -178,7 +179,8 @@ trait SQLQueryTestHelper extends SQLConfHelper with Logging {
         val msg = Option(e.getMessageParameters.get("traceback")).getOrElse("")
         (emptySchema, Seq(e.getClass.getName, msg))
       case e: SparkThrowable with Throwable if e.getCondition != null =>
-        (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+        (emptySchema, Seq(e.getClass.getName,
+          getMessage(e, format).replaceAll("uuid\\(Some\\(-?\\d+\\)\\)", "uuid(Some(x))")))
       case a: AnalysisException =>
         // Do not output the logical plan tree which contains expression IDs.
         // Also implement a crude way of masking expression IDs in the error message
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
index 3406a7109de7c..7deb30cf9e9ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
@@ -106,11 +106,11 @@ class STExpressionsSuite
     // Construct the input GEOGRAPHY expression.
     val geogExpr = ST_GeogFromWKB(wkbLiteral)
     assert(geogExpr.dataType.sameType(defaultGeographyType))
-    checkEvaluation(ST_AsBinary(geogExpr), wkb)
+    checkEvaluation(new ST_AsBinary(geogExpr), wkb)
     // Cast the GEOGRAPHY with fixed SRID to GEOGRAPHY with mixed SRID.
     val castExpr = Cast(geogExpr, mixedSridGeographyType)
     assert(castExpr.dataType.sameType(mixedSridGeographyType))
-    checkEvaluation(ST_AsBinary(castExpr), wkb)
+    checkEvaluation(new ST_AsBinary(castExpr), wkb)
 
     // Construct the input GEOGRAPHY SQL query, using WKB literal.
     val geogQueryLit: String = s"ST_GeogFromWKB(X'$wkbString')"
@@ -146,11 +146,11 @@ class STExpressionsSuite
     // Construct the input GEOMETRY expression.
     val geomExpr = new ST_GeomFromWKB(wkbLiteral)
     assert(geomExpr.dataType.sameType(defaultGeometryType))
-    checkEvaluation(ST_AsBinary(geomExpr), wkb)
+    checkEvaluation(new ST_AsBinary(geomExpr), wkb)
     // Cast the GEOMETRY with fixed SRID to GEOMETRY with mixed SRID.
     val castExpr = Cast(geomExpr, mixedSridGeometryType)
     assert(castExpr.dataType.sameType(mixedSridGeometryType))
-    checkEvaluation(ST_AsBinary(castExpr), wkb)
+    checkEvaluation(new ST_AsBinary(castExpr), wkb)
 
     // Construct the input GEOMETRY SQL query, using WKB literal.
     val geomQueryLit: String = s"ST_GeomFromWKB(X'$wkbString')"
@@ -475,16 +475,42 @@ class STExpressionsSuite
 
   test("ST_AsBinary") {
     // Test data: WKB representation of POINT(1 2).
-    val wkb = Hex.unhex("0101000000000000000000F03F0000000000000040".getBytes())
-    val wkbLiteral = Literal.create(wkb, BinaryType)
+    val wkbNdr = Hex.unhex("0101000000000000000000F03F0000000000000040".getBytes())
+    val wkbXdr = Hex.unhex("00000000013FF00000000000004000000000000000".getBytes())
+    val wkbLiteral = Literal.create(wkbNdr, BinaryType)
+    val endiannessNdr = Literal.create("NDR")
+    val endiannessXdr = Literal.create("XDR")
     // ST_GeogFromWKB and ST_AsBinary.
     val geographyExpression = ST_GeogFromWKB(wkbLiteral)
     assert(geographyExpression.dataType.sameType(defaultGeographyType))
-    checkEvaluation(ST_AsBinary(geographyExpression), wkb)
+    checkEvaluation(new ST_AsBinary(geographyExpression), wkbNdr)
+    checkEvaluation(ST_AsBinary(geographyExpression, endiannessNdr), wkbNdr)
+    checkEvaluation(ST_AsBinary(geographyExpression, Literal.create("nDr")), wkbNdr)
+    checkEvaluation(ST_AsBinary(geographyExpression, endiannessXdr), wkbXdr)
     // ST_GeomFromWKB and ST_AsBinary.
     val geometryExpression = new ST_GeomFromWKB(wkbLiteral)
     assert(geometryExpression.dataType.sameType(defaultGeometryType))
-    checkEvaluation(ST_AsBinary(geometryExpression), wkb)
+    checkEvaluation(new ST_AsBinary(geometryExpression), wkbNdr)
+    checkEvaluation(ST_AsBinary(geometryExpression, endiannessNdr), wkbNdr)
+    checkEvaluation(ST_AsBinary(geometryExpression, endiannessXdr), wkbXdr)
+    checkEvaluation(ST_AsBinary(geometryExpression, Literal.create("XdR")), wkbXdr)
+    // Test NULL handling.
+    checkEvaluation(new ST_AsBinary(Literal.create(null, defaultGeographyType)), null)
+    checkEvaluation(ST_AsBinary(Literal.create(null, defaultGeographyType), endiannessNdr), null)
+    checkEvaluation(new ST_AsBinary(Literal.create(null, defaultGeometryType)), null)
+    checkEvaluation(ST_AsBinary(Literal.create(null, defaultGeometryType), endiannessXdr), null)
+    checkEvaluation(ST_AsBinary(geographyExpression, Literal.create(null, StringType)), null)
+    checkEvaluation(ST_AsBinary(geometryExpression, Literal.create(null, StringType)), null)
+    // Test invalid endianness.
+    Seq(geographyExpression, geometryExpression).foreach { expr =>
+      checkError(
+        exception = intercept[SparkIllegalArgumentException] {
+          ST_AsBinary(expr, Literal.create("ABC")).eval()
+        },
+        condition = "ST_INVALID_ENDIANNESS_VALUE",
+        parameters = Map("endianness" -> "ABC")
+      )
+    }
   }
 
   test("ST_GeogFromWKB - expressions") {
@@ -494,7 +520,7 @@ class STExpressionsSuite
     // ST_GeogFromWKB with default SRID.
     val geographyExpression = ST_GeogFromWKB(wkbLiteral)
     assert(geographyExpression.dataType.sameType(defaultGeographyType))
-    checkEvaluation(ST_AsBinary(geographyExpression), wkb)
+    checkEvaluation(new ST_AsBinary(geographyExpression), wkb)
     checkEvaluation(ST_Srid(geographyExpression), defaultGeographySrid)
     // ST_GeogFromWKB with NULL input.
     val nullLiteral = Literal.create(null, BinaryType)
@@ -523,11 +549,11 @@ class STExpressionsSuite
     // ST_GeomFromWKB with default SRID.
     val geometryExpressionNoSrid = new ST_GeomFromWKB(wkbLiteral)
     assert(geometryExpressionNoSrid.dataType.sameType(defaultGeometryType))
-    checkEvaluation(ST_AsBinary(geometryExpressionNoSrid), wkb)
+    checkEvaluation(new ST_AsBinary(geometryExpressionNoSrid), wkb)
     // ST_GeomFromWKB with valid SRID.
     val geometryExpressionValidSrid = ST_GeomFromWKB(wkbLiteral, validSridLiteral)
     assert(geometryExpressionValidSrid.dataType.sameType(GeometryType(validSrid)))
-    checkEvaluation(ST_AsBinary(geometryExpressionValidSrid), wkb)
+    checkEvaluation(new ST_AsBinary(geometryExpressionValidSrid), wkb)
     // ST_GeomFromWKB with invalid SRID.
     val geometryExpressionInvalidSrid = ST_GeomFromWKB(wkbLiteral, invalidSridLiteral)
     checkError(
@@ -638,7 +664,7 @@ class STExpressionsSuite
     // ST_SetSrid on GEOGRAPHY expression.
     val geogLit = ST_SetSrid(geographyLiteral, sridLiteral)
     assert(geogLit.dataType.sameType(GeographyType(srid)))
-    checkEvaluation(ST_AsBinary(geogLit), wkb)
+    checkEvaluation(new ST_AsBinary(geogLit), wkb)
     val geogLitSrid = ST_Srid(geogLit)
     assert(geogLitSrid.dataType.sameType(IntegerType))
     checkEvaluation(geogLitSrid, srid)
@@ -662,7 +688,7 @@ class STExpressionsSuite
     // ST_SetSrid on GEOMETRY expression.
     val geomLit = ST_SetSrid(geometryLiteral, sridLiteral)
     assert(geomLit.dataType.sameType(GeometryType(srid)))
-    checkEvaluation(ST_AsBinary(geomLit), wkb)
+    checkEvaluation(new ST_AsBinary(geomLit), wkb)
     val geomLitSrid = ST_Srid(geomLit)
     assert(geomLitSrid.dataType.sameType(IntegerType))
     checkEvaluation(geomLitSrid, srid)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/STFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/STFunctionsSuite.scala
index 6cb8894ae5252..7cd0d84df8c23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/STFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/STFunctionsSuite.scala
@@ -30,18 +30,37 @@ class STFunctionsSuite extends SharedSparkSession {
 
   test("st_asbinary") {
     // Test data: Well-Known Binary (WKB) representations.
-    val df = Seq[(String)](
-      (
-        "0101000000000000000000f03f0000000000000040"
-      )).toDF("wkb")
+    val wkbNdr = "0101000000000000000000f03f0000000000000040"
+    val wkbXdr = "00000000013ff00000000000004000000000000000"
+    val df = Seq[(String, String, String, String)](
+        (wkbNdr, wkbXdr, "NDR", "XDR")
+      ).toDF("wkbNDR", "wkbXDR", "endNDR", "endXDR")
     // ST_GeogFromWKB/ST_GeomFromWKB and ST_AsBinary.
     checkAnswer(
       df.select(
-        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkb"))))).as("col0"),
-        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkb"))))).as("col1")),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbNDR"))))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbNDR")), "NDR"))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbNDR")), $"endNDR"))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbNDR")), "XDR"))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbNDR")), $"endXDR"))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbXDR"))))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbXDR")), "NDR"))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbXDR")), $"endNDR"))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbXDR")), "XDR"))),
+        lower(hex(st_asbinary(st_geogfromwkb(unhex($"wkbXDR")), $"endXDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbNDR"))))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbNDR")), "NDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbNDR")), $"endNDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbNDR")), "XDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbNDR")), $"endXDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbXDR"))))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbXDR")), "NDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbXDR")), $"endNDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbXDR")), "XDR"))),
+        lower(hex(st_asbinary(st_geomfromwkb(unhex($"wkbXDR")), $"endXDR")))),
       Row(
-        "0101000000000000000000f03f0000000000000040",
-        "0101000000000000000000f03f0000000000000040"))
+        wkbNdr, wkbNdr, wkbNdr, wkbXdr, wkbXdr, wkbNdr, wkbNdr, wkbNdr, wkbXdr, wkbXdr,
+        wkbNdr, wkbNdr, wkbNdr, wkbXdr, wkbXdr, wkbNdr, wkbNdr, wkbNdr, wkbXdr, wkbXdr))
   }
 
   test("st_geogfromwkb") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala
index eb3815f6209c8..b40e3799a9c04 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala
@@ -17,8 +17,12 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.SparkIllegalArgumentException
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
+import org.apache.spark.sql.connector.catalog.InMemoryCatalog
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, LongType}
 
 /**
  * Tests for SET PATH command and session path management.
@@ -26,8 +30,8 @@ import org.apache.spark.sql.test.SharedSparkSession
  * DEFAULT_PATH, SYSTEM_PATH, CURRENT_SCHEMA/CURRENT_DATABASE expansion,
  * PATH (append), duplicate detection, and error conditions.
  *
- * Resolution-level tests (tables/functions resolving via the stored path)
- * belong in a separate suite once the resolution engine is wired.
+ * Resolution-level tests (tables/functions resolving via stored frozen path)
+ * are covered in SQLViewSuite and SQLFunctionSuite (SPARK-56639).
  */
 class SetPathSuite extends SharedSparkSession {
 
@@ -211,7 +215,7 @@ class SetPathSuite extends SharedSparkSession {
         },
         condition = "DUPLICATE_SQL_PATH_ENTRY",
         sqlState = Some("42732"),
-        parameters = Map("pathEntry" -> "spark_catalog.default"))
+        parameters = Map("pathEntry" -> "current_schema"))
     }
   }
 
@@ -231,16 +235,17 @@ class SetPathSuite extends SharedSparkSession {
     }
   }
 
-  test("PATH enabled: duplicate after expanding CURRENT_SCHEMA") {
+  test("PATH enabled: literal + CURRENT_SCHEMA collision is tolerated (USE-state dependent)") {
+    // SET PATH only rejects static duplicates (literal-vs-literal, current_schema repeated).
+    // A literal that happens to match the live current_schema is not flagged: a later
+    // `USE SCHEMA` may make them diverge, and at lookup the first match wins anyway.
+    // `system.builtin` is included so `current_path()` itself remains resolvable.
     withPathEnabled {
       sql("USE spark_catalog.default")
-      checkError(
-        exception = intercept[AnalysisException] {
-          sql("SET PATH = spark_catalog.default, current_schema")
-        },
-        condition = "DUPLICATE_SQL_PATH_ENTRY",
-        sqlState = Some("42732"),
-        parameters = Map("pathEntry" -> "spark_catalog.default"))
+      sql("SET PATH = spark_catalog.default, current_schema, system.builtin")
+      val entries = pathEntries(currentPath())
+      assert(entries === Seq("spark_catalog.default", "spark_catalog.default", "system.builtin"),
+        s"Expected literal + resolved CURRENT_SCHEMA preserved; got: $entries")
     }
   }
 
@@ -253,7 +258,7 @@ class SetPathSuite extends SharedSparkSession {
         },
         condition = "DUPLICATE_SQL_PATH_ENTRY",
         sqlState = Some("42732"),
-        parameters = Map("pathEntry" -> "spark_catalog.default"))
+        parameters = Map("pathEntry" -> "current_schema"))
     }
   }
 
@@ -269,14 +274,60 @@ class SetPathSuite extends SharedSparkSession {
     }
   }
 
-  test("PATH enabled: SET PATH = SYSTEM_PATH includes system.builtin and system.session") {
+  test("PATH enabled: SET PATH = SYSTEM_PATH expands to system-managed namespaces") {
+    // SPARK-57109: SYSTEM_PATH expands to the system-managed namespaces under the `system`
+    // catalog. Today that is just `system.builtin`; the shortcut is reserved for future
+    // system-managed schemas.
     withPathEnabled {
       sql("SET PATH = SYSTEM_PATH")
       val entries = pathEntries(currentPath())
-      assert(entries.contains("system.builtin"),
-        s"SYSTEM_PATH should include system.builtin; got: $entries")
-      assert(entries.contains("system.session"),
-        s"SYSTEM_PATH should include system.session; got: $entries")
+      assert(entries === Seq("system.builtin"),
+        s"SYSTEM_PATH should expand to exactly [system.builtin]; got: $entries")
+    }
+  }
+
+  test("PATH enabled: SET PATH = DEFAULT_PATH includes system.builtin, system.session, " +
+    "and the current schema") {
+    // SPARK-57109: pin the spark-built-in default ordering used when `spark.sql.defaultPath`
+    // is empty, so a future change to SYSTEM_PATH cannot silently drift the DEFAULT_PATH
+    // contract. The default `sessionFunctionResolutionOrder` is "second" (builtin first, then
+    // session, then catalog entries); ordering tests for the other modes live below.
+    withPathEnabled {
+      sql("USE spark_catalog.default")
+      sql("SET PATH = DEFAULT_PATH")
+      val entries = pathEntries(currentPath())
+      assert(entries === Seq("system.builtin", "system.session", "spark_catalog.default"),
+        s"DEFAULT_PATH should expand to system.builtin, system.session, and the current " +
+          s"schema; got: $entries")
+    }
+  }
+
+  test("PATH enabled: SET PATH = SYSTEM_PATH, CURRENT_SCHEMA composes cleanly") {
+    // SPARK-57109: SYSTEM_PATH plus CURRENT_SCHEMA is the canonical "system functions plus my
+    // working schema" path; verify the expansion is exactly those two entries in order.
+    withPathEnabled {
+      sql("USE spark_catalog.default")
+      sql("SET PATH = SYSTEM_PATH, CURRENT_SCHEMA")
+      val entries = pathEntries(currentPath())
+      assert(entries === Seq("system.builtin", "spark_catalog.default"),
+        s"SYSTEM_PATH, CURRENT_SCHEMA should expand to [system.builtin, " +
+          s"current schema]; got: $entries")
+    }
+  }
+
+  test("PATH enabled: SET PATH = SYSTEM_PATH, system.session is the documented migration form") {
+    // SPARK-57109: callers who relied on the old SYSTEM_PATH expansion (system.builtin +
+    // system.session) can name system.session explicitly. Because SYSTEM_PATH now expands to
+    // only system.builtin, listing system.session alongside it is legal and yields
+    // [system.builtin, system.session]. If SYSTEM_PATH ever re-expanded to carry system.session
+    // again, this entry would collide and raise DUPLICATE_SQL_PATH_ENTRY -- which is the
+    // regression this test guards against.
+    withPathEnabled {
+      sql("SET PATH = SYSTEM_PATH, system.session")
+      val entries = pathEntries(currentPath())
+      assert(entries === Seq("system.builtin", "system.session"),
+        s"SYSTEM_PATH, system.session should expand to [system.builtin, system.session]; " +
+          s"got: $entries")
     }
   }
 
@@ -320,18 +371,22 @@ class SetPathSuite extends SharedSparkSession {
 
   test("PATH enabled: multi-level namespace (3+ parts) is accepted") {
     withPathEnabled {
-      sql("SET PATH = iceberg_cat.db1.db2, spark_catalog.default")
-      val entries = pathEntries(currentPath())
-      assert(entries.head === "iceberg_cat.db1.db2",
-        s"Multi-level namespace should be accepted; got: $entries")
+      // SET PATH should accept multi-level namespaces without error.
+      // We verify the path is stored correctly via the CatalogManager API
+      // rather than currentPath(), which would fail because spark_catalog
+      // only supports single-part namespaces.
+      sql("SET PATH = spark_catalog.ns1.ns2, spark_catalog.default")
+      val stored = spark.sessionState.catalogManager.sessionPathEntries
+      assert(stored.isDefined, "Session path should be stored")
+      assert(stored.get.length == 2, s"Should have 2 entries, got: ${stored.get}")
     }
   }
 
   test("PATH enabled: backtick-quoted identifiers with dots round-trip correctly") {
     withPathEnabled {
-      sql("SET PATH = `cat.a`.`sch.b`")
+      sql("SET PATH = spark_catalog.`sch.b`, system.builtin")
       val entries = pathEntries(currentPath())
-      assert(entries === Seq("`cat.a`.`sch.b`"),
+      assert(entries.head === "spark_catalog.`sch.b`",
         s"Backtick-quoted identifiers should round-trip; got: $entries")
     }
   }
@@ -357,6 +412,124 @@ class SetPathSuite extends SharedSparkSession {
     }
   }
 
+  test("PATH enabled: case-sensitive mode does not treat differently cased entries as duplicates") {
+    withSQLConf(
+      SQLConf.PATH_ENABLED.key -> "true",
+      SQLConf.CASE_SENSITIVE.key -> "true") {
+      sql("SET PATH = spark_catalog.DEFAULT, spark_catalog.default")
+      val stored = spark.sessionState.catalogManager.sessionPathEntries.get
+      val rendered = stored.map(_.resolve("ignored", Nil).mkString("."))
+      assert(rendered === Seq("spark_catalog.DEFAULT", "spark_catalog.default"))
+    }
+  }
+
+  test("PATH enabled: unqualified SET VAR follows PATH; DDL on variables ignores PATH") {
+    withPathEnabled {
+      sql("DECLARE VARIABLE system.session.path_var_gate = 7")
+      try {
+        sql("SET PATH = spark_catalog.default")
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("SET VAR path_var_gate = 8")
+          },
+          condition = "UNRESOLVED_VARIABLE",
+          sqlState = "42883",
+          parameters = Map(
+            "variableName" -> "`path_var_gate`",
+            "searchPath" -> "[`spark_catalog`.`default`]"),
+          context = ExpectedContext("path_var_gate", 8, 20))
+
+        sql("SET VAR system.session.path_var_gate = 9")
+        checkAnswer(sql("SELECT system.session.path_var_gate"), Row(9))
+
+        sql("DROP TEMPORARY VARIABLE path_var_gate")
+
+        sql("DECLARE VARIABLE system.session.path_var_gate = 7")
+        sql("SET PATH = spark_catalog.default, system.session")
+        sql("SET VAR path_var_gate = 11")
+        checkAnswer(sql("SELECT path_var_gate"), Row(11))
+        sql("DROP TEMPORARY VARIABLE path_var_gate")
+      } finally {
+        sql("DROP TEMPORARY VARIABLE IF EXISTS system.session.path_var_gate")
+      }
+    }
+  }
+
+  test("PATH enabled: unqualified FETCH ... INTO follows PATH") {
+    withSQLConf(
+      SQLConf.PATH_ENABLED.key -> "true",
+      SQLConf.SQL_SCRIPTING_CURSOR_ENABLED.key -> "true") {
+      sql("DECLARE OR REPLACE VARIABLE path_fetch_target INT")
+      try {
+        // Sanity: FETCH INTO works under the default path (system.session is on it).
+        val ok = sql(
+          """
+            |BEGIN
+            |  DECLARE cur CURSOR FOR SELECT 42 AS val;
+            |  OPEN cur;
+            |  FETCH cur INTO path_fetch_target;
+            |  CLOSE cur;
+            |END;
+            |""".stripMargin)
+        checkAnswer(ok, Seq.empty[Row])
+        checkAnswer(sql("SELECT path_fetch_target"), Row(42))
+
+        // Set PATH to exclude system.session: unqualified FETCH INTO target now fails
+        // with the actual SQL path rendered as a bracketed list.
+        sql("SET PATH = spark_catalog.default")
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(
+              """
+                |BEGIN
+                |  DECLARE cur CURSOR FOR SELECT 99 AS val;
+                |  OPEN cur;
+                |  FETCH cur INTO path_fetch_target;
+                |  CLOSE cur;
+                |END;
+                |""".stripMargin)
+          },
+          condition = "UNRESOLVED_VARIABLE",
+          sqlState = "42883",
+          parameters = Map(
+            "variableName" -> "`path_fetch_target`",
+            "searchPath" -> "[`spark_catalog`.`default`]"),
+          context = ExpectedContext("path_fetch_target", -1, -1))
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TEMPORARY VARIABLE IF EXISTS path_fetch_target")
+      }
+    }
+  }
+
+  test("PATH enabled: DECLARE / SET VAR / DROP cycle under non-default PATH") {
+    withPathEnabled {
+      sql("CREATE SCHEMA IF NOT EXISTS path_var_cycle")
+      try {
+        sql("SET PATH = spark_catalog.path_var_cycle, system.session")
+        sql("DECLARE OR REPLACE VARIABLE cycle_var = 1")
+        sql("SET VAR system.session.cycle_var = 2")
+        sql("SET VAR cycle_var = 3")
+        checkAnswer(sql("SELECT cycle_var"), Row(3))
+        sql("DROP TEMPORARY VARIABLE cycle_var")
+      } finally {
+        sql("DROP TEMPORARY VARIABLE IF EXISTS system.session.cycle_var")
+        sql("DROP SCHEMA IF EXISTS path_var_cycle")
+      }
+    }
+  }
+
+  test("PATH enabled: current_path does not accept arguments") {
+    withPathEnabled {
+      // Ensure built-in function lookup succeeds so this assertion targets arg-count semantics.
+      sql("SET PATH = DEFAULT_PATH")
+      val e = intercept[AnalysisException] {
+        sql("SELECT current_path(1)")
+      }
+      assert(e.getCondition == "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", e.getMessage)
+    }
+  }
+
   test("PATH enabled: DEFAULT_PATH respects sessionFunctionResolutionOrder = first") {
     withSQLConf(
       SQLConf.PATH_ENABLED.key -> "true",
@@ -379,9 +552,689 @@ class SetPathSuite extends SharedSparkSession {
     }
   }
 
-  // TODO: cloneSession() constructs a new CatalogManager per forked session and
-  // explicitly copies only the stored session path via copySessionPathFrom.
-  // Other CatalogManager state propagation (current catalog/namespace, registered
-  // catalogs) on clone is currently incidental — audit and pin down the intended
-  // semantics in a follow-up.
+  // --- cloneSession() propagation matrix --------------------------------------
+  // The cloned session is built via `BaseSessionStateBuilder` from a parent
+  // `SessionState`. Per-component hand-offs on clone:
+  //   - `SessionCatalog.copyStateTo` copies `currentDb` and `tempViews`,
+  //   - `CatalogManager.copySessionPathFrom` copies the stored `_sessionPath`,
+  //   - `functionRegistry.clone()` and `tableFunctionRegistry.clone()` copy
+  //     temporary functions.
+  // What is NOT propagated:
+  //   - the temp variable registry (new `TempVariableManager` per session),
+  //   - the `CatalogManager` current-catalog / current-namespace (re-read from
+  //     conf defaults in the child),
+  //   - the registered v2 `catalogs` map (lazy-loaded per session).
+  // The tests below pin this observed behavior so any future change has to
+  // update the assertions.
+
+  test("cloneSession: stored SET PATH propagates to the child session") {
+    withPathEnabled {
+      sql("SET PATH = spark_catalog.default, system.builtin")
+      try {
+        val child = spark.cloneSession()
+        val entries = pathEntries(
+          child.sql("SELECT current_path()").collect().head.getString(0))
+        assert(entries === Seq("spark_catalog.default", "system.builtin"),
+          s"Cloned session should inherit stored SET PATH; got: $entries")
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+      }
+    }
+  }
+
+  test("cloneSession: USE SCHEMA on the parent propagates to the child") {
+    sql("CREATE SCHEMA IF NOT EXISTS path_clone_use")
+    try {
+      sql("USE spark_catalog.path_clone_use")
+      val child = spark.cloneSession()
+      val childDb = child.sql("SELECT current_database()").head().getString(0)
+      assert(childDb == "path_clone_use",
+        s"Cloned session should inherit the parent's current schema; got: $childDb")
+    } finally {
+      sql("USE spark_catalog.default")
+      sql("DROP SCHEMA IF EXISTS path_clone_use")
+    }
+  }
+
+  test("cloneSession: temp views on the parent propagate to the child") {
+    sql("CREATE TEMPORARY VIEW path_clone_view AS SELECT 1 AS c")
+    try {
+      val child = spark.cloneSession()
+      checkAnswer(child.sql("SELECT c FROM path_clone_view"), Row(1))
+    } finally {
+      sql("DROP VIEW IF EXISTS path_clone_view")
+    }
+  }
+
+  test("cloneSession: temp functions on the parent propagate to the child (cloned " +
+      "functionRegistry)") {
+    sql("CREATE TEMPORARY FUNCTION path_clone_fn() RETURNS INT RETURN 42")
+    try {
+      val child = spark.cloneSession()
+      checkAnswer(child.sql("SELECT path_clone_fn()"), Row(42))
+      // Snapshot semantics: dropping in the parent must not affect the already-cloned child.
+      sql("DROP TEMPORARY FUNCTION path_clone_fn")
+      checkAnswer(child.sql("SELECT path_clone_fn()"), Row(42))
+    } finally {
+      sql("DROP TEMPORARY FUNCTION IF EXISTS path_clone_fn")
+    }
+  }
+
+  test("cloneSession: temp variables on the parent are NOT propagated to the child") {
+    sql("DECLARE OR REPLACE VARIABLE path_clone_var INT DEFAULT 7")
+    try {
+      val child = spark.cloneSession()
+      val e = intercept[AnalysisException] {
+        child.sql("SELECT path_clone_var").collect()
+      }
+      // Either UNRESOLVED_VARIABLE or UNRESOLVED_COLUMN; both confirm the variable
+      // did not survive the clone.
+      assert(
+        e.getCondition == "UNRESOLVED_VARIABLE" ||
+          e.getCondition.startsWith("UNRESOLVED_COLUMN"),
+        s"Temp variables should NOT propagate to the clone; got: ${e.getCondition}")
+    } finally {
+      sql("DROP TEMPORARY VARIABLE IF EXISTS path_clone_var")
+    }
+  }
+
+  test("cloneSession: child SET PATH does not leak back to the parent") {
+    withPathEnabled {
+      sql("SET PATH = spark_catalog.default, system.builtin")
+      try {
+        val child = spark.cloneSession()
+        child.sql("SET PATH = system.session, system.builtin")
+        val parentEntries = pathEntries(currentPath())
+        assert(parentEntries === Seq("spark_catalog.default", "system.builtin"),
+          s"Child SET PATH must not affect the parent; parent got: $parentEntries")
+        val childEntries = pathEntries(
+          child.sql("SELECT current_path()").collect().head.getString(0))
+        assert(childEntries === Seq("system.session", "system.builtin"),
+          s"Child SET PATH should be visible only in the child; child got: $childEntries")
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+      }
+    }
+  }
+
+  // --- Resolution tests: verify SET PATH affects actual table/function lookup ---
+
+  test("PATH enabled: table resolves from first matching path entry") {
+    withPathEnabled {
+      sql("CREATE SCHEMA IF NOT EXISTS path_res_a")
+      sql("CREATE SCHEMA IF NOT EXISTS path_res_b")
+      sql("CREATE TABLE path_res_a.tbl (x INT) USING parquet")
+      sql("CREATE TABLE path_res_b.tbl (x INT) USING parquet")
+      sql("INSERT INTO path_res_a.tbl VALUES (1)")
+      sql("INSERT INTO path_res_b.tbl VALUES (2)")
+      try {
+        sql("SET PATH = spark_catalog.path_res_a, spark_catalog.path_res_b, system.builtin")
+        checkAnswer(sql("SELECT x FROM tbl"), Row(1))
+        sql("SET PATH = spark_catalog.path_res_b, spark_catalog.path_res_a, system.builtin")
+        checkAnswer(sql("SELECT x FROM tbl"), Row(2))
+      } finally {
+        sql("DROP TABLE IF EXISTS path_res_a.tbl")
+        sql("DROP TABLE IF EXISTS path_res_b.tbl")
+        sql("DROP SCHEMA IF EXISTS path_res_a")
+        sql("DROP SCHEMA IF EXISTS path_res_b")
+      }
+    }
+  }
+
+  test("PATH enabled: function resolves from first matching path entry") {
+    withPathEnabled {
+      sql("CREATE SCHEMA IF NOT EXISTS path_fn_a")
+      sql("CREATE SCHEMA IF NOT EXISTS path_fn_b")
+      sql("CREATE FUNCTION path_fn_a.pick() RETURNS INT RETURN 1")
+      sql("CREATE FUNCTION path_fn_b.pick() RETURNS INT RETURN 2")
+      try {
+        sql("SET PATH = spark_catalog.path_fn_a, spark_catalog.path_fn_b, system.builtin")
+        checkAnswer(sql("SELECT pick()"), Row(1))
+        sql("SET PATH = spark_catalog.path_fn_b, spark_catalog.path_fn_a, system.builtin")
+        checkAnswer(sql("SELECT pick()"), Row(2))
+      } finally {
+        sql("DROP FUNCTION IF EXISTS path_fn_a.pick")
+        sql("DROP FUNCTION IF EXISTS path_fn_b.pick")
+        sql("DROP SCHEMA IF EXISTS path_fn_a")
+        sql("DROP SCHEMA IF EXISTS path_fn_b")
+      }
+    }
+  }
+
+  test("PATH enabled: unqualified table fails when schema not in path") {
+    withPathEnabled {
+      sql("CREATE SCHEMA IF NOT EXISTS path_miss")
+      sql("CREATE TABLE path_miss.hidden (x INT) USING parquet")
+      try {
+        sql("SET PATH = spark_catalog.default, system.builtin")
+        val err = intercept[AnalysisException] {
+          sql("SELECT * FROM hidden")
+        }
+        assert(err.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"),
+          s"Expected TABLE_OR_VIEW_NOT_FOUND, got: ${err.getMessage}")
+      } finally {
+        sql("DROP TABLE IF EXISTS path_miss.hidden")
+        sql("DROP SCHEMA IF EXISTS path_miss")
+      }
+    }
+  }
+
+  // --- spark.sql.defaultPath (SQLConf.DEFAULT_PATH) ---
+  // The conf carries the SET PATH grammar; sessionPathEntries falls back to it lazily
+  // when no `SET PATH` has been issued, mirroring how `currentCatalog` falls back to
+  // [[SQLConf.DEFAULT_CATALOG]].
+
+  test("DEFAULT_PATH conf: lazy fallback when no SET PATH issued") {
+    withSQLConf(
+        SQLConf.PATH_ENABLED.key -> "true",
+        SQLConf.DEFAULT_PATH.key -> "spark_catalog.default, system.builtin") {
+      val catalogManager = spark.sessionState.catalogManager
+      val priorSessionPath = catalogManager.storedSessionPathEntries
+      catalogManager.clearSessionPath()
+      try {
+        val entries = pathEntries(currentPath())
+        assert(entries == Seq("spark_catalog.default", "system.builtin"),
+          s"Expected DEFAULT_PATH conf to drive current_path(); got: $entries")
+        assert(catalogManager.storedSessionPathEntries.isEmpty,
+          "DEFAULT_PATH lookup must not write to the in-memory stored session path")
+      } finally {
+        catalogManager.clearSessionPath()
+        priorSessionPath.foreach(catalogManager.setSessionPath)
+      }
+    }
+  }
+
+  test("DEFAULT_PATH conf: explicit SET PATH overrides the conf") {
+    withSQLConf(
+        SQLConf.PATH_ENABLED.key -> "true",
+        SQLConf.DEFAULT_PATH.key -> "system.builtin, system.session") {
+      val catalogManager = spark.sessionState.catalogManager
+      val priorSessionPath = catalogManager.storedSessionPathEntries
+      try {
+        sql("SET PATH = system.session, system.builtin")
+        val entries = pathEntries(currentPath())
+        assert(entries == Seq("system.session", "system.builtin"),
+          s"Expected SET PATH to win over DEFAULT_PATH conf; got: $entries")
+      } finally {
+        catalogManager.clearSessionPath()
+        priorSessionPath.foreach(catalogManager.setSessionPath)
+      }
+    }
+  }
+
+  test("DEFAULT_PATH conf: SET PATH = DEFAULT_PATH expands to the conf value") {
+    withSQLConf(
+        SQLConf.PATH_ENABLED.key -> "true",
+        SQLConf.DEFAULT_PATH.key -> "system.session, system.builtin, current_schema") {
+      val catalogManager = spark.sessionState.catalogManager
+      val priorSessionPath = catalogManager.storedSessionPathEntries
+      try {
+        sql("SET PATH = DEFAULT_PATH")
+        val entries = pathEntries(currentPath())
+        assert(entries.head.contains("system.session"),
+          s"DEFAULT_PATH expansion should follow conf order (session first); got: $entries")
+        assert(catalogManager.storedSessionPathEntries.isDefined,
+          "After SET PATH the in-memory stored session path should be populated")
+      } finally {
+        catalogManager.clearSessionPath()
+        priorSessionPath.foreach(catalogManager.setSessionPath)
+      }
+    }
+  }
+
+  test("DEFAULT_PATH conf: cycle break -- inner DEFAULT_PATH falls back to builtin order") {
+    withSQLConf(
+        SQLConf.PATH_ENABLED.key -> "true",
+        SQLConf.DEFAULT_PATH.key -> "DEFAULT_PATH",
+        // Pin order conf to "first" so the spark-builtin default ordering is observable.
+        SQLConf.SESSION_FUNCTION_RESOLUTION_ORDER.key -> "first") {
+      val catalogManager = spark.sessionState.catalogManager
+      val priorSessionPath = catalogManager.storedSessionPathEntries
+      catalogManager.clearSessionPath()
+      try {
+        val entries = pathEntries(currentPath())
+        assert(entries.head.contains("system.session"),
+          s"Inner DEFAULT_PATH should resolve to builtin order seeded by the order conf " +
+            s"('first' -> session leading); got: $entries")
+      } finally {
+        catalogManager.clearSessionPath()
+        priorSessionPath.foreach(catalogManager.setSessionPath)
+      }
+    }
+  }
+
+  test("DEFAULT_PATH conf: invalid value rejected on SET spark.sql.defaultPath") {
+    withPathEnabled {
+      val e = intercept[SparkIllegalArgumentException] {
+        sql("SET spark.sql.defaultPath = this is not a path")
+      }
+      assert(e.getCondition.startsWith("INVALID_CONF_VALUE"), e.getMessage)
+    }
+  }
+
+  test("DEFAULT_PATH conf: PATH keyword is rejected on SET spark.sql.defaultPath") {
+    withPathEnabled {
+      val e = intercept[SparkIllegalArgumentException] {
+        sql("SET spark.sql.defaultPath = PATH, system.builtin")
+      }
+      assert(e.getCondition.startsWith("INVALID_CONF_VALUE"), e.getMessage)
+    }
+  }
+
+  test("DEFAULT_PATH conf: PATH disabled returns no fallback") {
+    withSQLConf(
+        SQLConf.PATH_ENABLED.key -> "false",
+        SQLConf.DEFAULT_PATH.key -> "system.session, system.builtin") {
+      val catalogManager = spark.sessionState.catalogManager
+      assert(catalogManager.sessionPathEntries.isEmpty,
+        "DEFAULT_PATH conf must not take effect when PATH is disabled")
+    }
+  }
+
+  // --- Path-driven security check (built on the lazy DEFAULT_PATH fallback) ---
+  // The "block temp function shadowing builtin" check is now driven by the live PATH, so
+  // changes via SET PATH or DEFAULT_PATH take effect even when the legacy order conf is
+  // left at its default.
+
+  test("path-driven security check: SET PATH putting session before builtin blocks temp " +
+      "function with a builtin name") {
+    withPathEnabled {
+      val catalogManager = spark.sessionState.catalogManager
+      val priorSessionPath = catalogManager.storedSessionPathEntries
+      try {
+        // Default `sessionFunctionResolutionOrder` is "second" (builtin first), but SET PATH
+        // overrides that to put session first. The security check must reflect the live path.
+        sql("SET PATH = system.session, system.builtin")
+        val e = intercept[AnalysisException] {
+          sql("CREATE TEMPORARY FUNCTION count() RETURNS INT RETURN 1")
+        }
+        assert(e.getCondition == "ROUTINE_ALREADY_EXISTS", e.getMessage)
+      } finally {
+        sql("DROP TEMPORARY FUNCTION IF EXISTS session.count")
+        catalogManager.clearSessionPath()
+        priorSessionPath.foreach(catalogManager.setSessionPath)
+      }
+    }
+  }
+
+  test("path-driven security check: DEFAULT_PATH conf putting session before builtin " +
+      "blocks temp function with a builtin name (no SET PATH issued)") {
+    withSQLConf(
+        SQLConf.PATH_ENABLED.key -> "true",
+        SQLConf.DEFAULT_PATH.key -> "system.session, system.builtin") {
+      val catalogManager = spark.sessionState.catalogManager
+      val priorSessionPath = catalogManager.storedSessionPathEntries
+      catalogManager.clearSessionPath()
+      try {
+        // Order conf is left at its default ("second"). The path-driven gate must read
+        // DEFAULT_PATH and fire the security check for unqualified temp/builtin collisions.
+        val e = intercept[AnalysisException] {
+          sql("CREATE TEMPORARY FUNCTION count() RETURNS INT RETURN 1")
+        }
+        assert(e.getCondition == "ROUTINE_ALREADY_EXISTS", e.getMessage)
+      } finally {
+        sql("DROP TEMPORARY FUNCTION IF EXISTS session.count")
+        catalogManager.clearSessionPath()
+        priorSessionPath.foreach(catalogManager.setSessionPath)
+      }
+    }
+  }
+
+  test("PATH enabled: SET PATH with only user schemas does not implicitly resolve builtins") {
+    withPathEnabled {
+      sql("CREATE SCHEMA IF NOT EXISTS only_user_on_path")
+      try {
+        sql("SET PATH = spark_catalog.only_user_on_path")
+        val e = intercept[AnalysisException] {
+          sql("SELECT abs(-1)").collect()
+        }
+        assert(e.getCondition == "UNRESOLVED_ROUTINE", e.getMessage)
+      } finally {
+        sql("DROP SCHEMA IF EXISTS only_user_on_path")
+      }
+    }
+  }
+
+  test("PATH enabled: explicit SET PATH with system.session AFTER a user catalog still " +
+      "reaches temp functions") {
+    // Explicit paths are honored as written: placing `system.session` after a user catalog
+    // is the user's authorization for unqualified temp functions to resolve. Contrast with
+    // the implicit (no SET PATH, no DEFAULT_PATH) form, which preserves the security property
+    // of the seeded default path.
+    withPathEnabled {
+      sql("CREATE SCHEMA IF NOT EXISTS path_interleaved_user")
+      try {
+        sql("CREATE TEMPORARY FUNCTION path_interleaved_temp() RETURNS INT RETURN 7")
+        try {
+          sql("SET PATH = system.builtin, spark_catalog.path_interleaved_user, system.session")
+          checkAnswer(sql("SELECT path_interleaved_temp()"), Row(7))
+        } finally {
+          sql("DROP TEMPORARY FUNCTION IF EXISTS path_interleaved_temp")
+        }
+      } finally {
+        sql("DROP SCHEMA IF EXISTS path_interleaved_user")
+      }
+    }
+  }
+
+  test("PATH enabled: SET PATH with user schema before system.builtin still resolves builtins") {
+    // Exercises systemFunctionKindsFromPath with a user-catalog entry preceding
+    // system.builtin: the helper flat-scans the path, so Builtin still appears
+    // in the kinds list and unqualified `abs` resolves.
+    withPathEnabled {
+      sql("CREATE SCHEMA IF NOT EXISTS path_user_before_builtin")
+      try {
+        sql("SET PATH = spark_catalog.path_user_before_builtin, system.builtin")
+        // `abs` is a builtin; if Builtin did not appear in the kinds list,
+        // unqualified `abs(-1)` would fail with UNRESOLVED_ROUTINE.
+        checkAnswer(sql("SELECT abs(-1)"), Row(1))
+      } finally {
+        sql("DROP SCHEMA IF EXISTS path_user_before_builtin")
+      }
+    }
+  }
+
+  test("path-driven COUNT(*) rewrite gate: temp count shadowing builtin under SET PATH " +
+      "(session-first) suppresses the * -> 1 rewrite") {
+    // `Analyzer.matchesFunctionName` consults
+    // `FunctionResolution.isSessionBeforeBuiltinInPath` to decide whether COUNT(*) is the
+    // builtin (eligible for the COUNT(*) -> COUNT(1) shortcut) or a user-defined override.
+    // Default `sessionFunctionResolutionOrder` is "second", so creating a temp count while
+    // the default PATH is in effect passes the security check. Once SET PATH puts
+    // `system.session` before `system.builtin`, the rewrite must be suppressed and the
+    // star expansion must reach the temp `count`.
+    withPathEnabled {
+      sql("CREATE TEMPORARY FUNCTION count(x INT) RETURNS INT RETURN x + 100")
+      try {
+        // PATH still has builtin first: count(*) rewrites to count(1), which resolves to
+        // the builtin count and returns the row count of the input (1).
+        checkAnswer(sql("SELECT count(*) FROM VALUES (1) AS t(a)"), Row(1))
+
+        // Put session before builtin via SET PATH. The rewrite gate now reports
+        // `isSessionBeforeBuiltinInPath = true` AND a temp count exists, so the
+        // analyzer must NOT collapse `count(*)` to `count(1)`. The `*` then expands
+        // against the table's single column to `count(a)`, which resolves through
+        // the temp under the live path: 1 + 100 = 101.
+        sql("SET PATH = system.session, system.builtin")
+        checkAnswer(sql("SELECT count(*) FROM VALUES (1) AS t(a)"), Row(101))
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TEMPORARY FUNCTION IF EXISTS count")
+      }
+    }
+  }
+
+  test("path-driven COUNT(*) rewrite gate: rewrite still applies for unrelated builtins") {
+    // The gate fires ONLY when a temp function with the same unqualified
+    // name as the builtin exists. A temp with a different name must not affect the
+    // COUNT(*) -> COUNT(1) shortcut even when session is searched before builtin.
+    withPathEnabled {
+      sql("CREATE TEMPORARY FUNCTION my_helper(x INT) RETURNS INT RETURN x + 1")
+      try {
+        sql("SET PATH = system.session, system.builtin")
+        // No temp `count` exists; the rewrite still fires and the builtin row counter
+        // returns the row count of the input (3).
+        checkAnswer(sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)"), Row(3))
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TEMPORARY FUNCTION IF EXISTS my_helper")
+      }
+    }
+  }
+
+  test("path-driven COUNT(*) rewrite gate: single-pass resolver suppresses the rewrite " +
+      "under SET PATH (session-first)") {
+    // The single-pass resolver mirrors the fixed-point gate via
+    // `FunctionResolverUtils.isUnqualifiedCountShadowedByTemp`, which is wired into
+    // `isNonDistinctCount` and consulted by `handleStarInArguments`.
+    //
+    // Setup (`CREATE TEMPORARY FUNCTION`, `SET PATH`) and execution (Dataset collect via
+    // checkAnswer, which inserts a `DeserializeToObject` node the single-pass analyzer
+    // does not yet support) are run under the fixed-point analyzer; only the actual
+    // count(*) analysis is run under the single-pass analyzer, and we assert against the
+    // analyzed plan's output schema. The builtin count returns BIGINT (rewrite applied);
+    // the temp count(INT) returns INT (rewrite suppressed and the star expansion routes
+    // through the temp), so the schema's first-field dataType tells us which branch fired.
+    withPathEnabled {
+      sql("CREATE TEMPORARY FUNCTION count(x INT) RETURNS INT RETURN x + 100")
+      try {
+        val countStarSql = "SELECT count(*) FROM VALUES (1) AS t(a)"
+
+        // PATH builtin-first: the single-pass gate reports
+        // `isUnqualifiedCountShadowedByTemp = false`, the shortcut fires, and the analyzed
+        // output is the BIGINT builtin count.
+        withSQLConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true") {
+          val tpe = spark.sql(countStarSql).queryExecution.analyzed.schema.head.dataType
+          assert(tpe == LongType,
+            s"Expected BIGINT (builtin count rewrite); got: $tpe")
+        }
+
+        sql("SET PATH = system.session, system.builtin")
+
+        // PATH session-first: the gate reports true, the rewrite is suppressed, the star
+        // expands against `a`, and the temp count(INT) wins; analyzed output is INT.
+        withSQLConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true") {
+          val tpe = spark.sql(countStarSql).queryExecution.analyzed.schema.head.dataType
+          assert(tpe == IntegerType,
+            s"Expected INT (temp count; rewrite suppressed); got: $tpe")
+        }
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TEMPORARY FUNCTION IF EXISTS count")
+      }
+    }
+  }
+
+  test("SPARK-56939: concurrent USE SCHEMA / USE CATALOG and unqualified function lookups " +
+    "do not deadlock") {
+    // Regression for SPARK-56939. Prior to the fix, [[CatalogManager.setCurrentNamespace]]
+    // (driven by `USE SCHEMA`) and [[CatalogManager.setCurrentCatalog]] (driven by
+    // `USE CATALOG`) both held the manager's intrinsic lock while calling into
+    // [[SessionCatalog.setCurrentDatabase*]] (which takes the catalog's intrinsic lock),
+    // while concurrent unqualified function resolution acquired the catalog's intrinsic lock
+    // and then reached back into the manager via
+    // [[CatalogManager.sqlResolutionPathEntries]]. That lock-order inversion deadlocked the
+    // session whenever a `USE`-style command raced with any unqualified function reference.
+    //
+    // The hazard is independent of [[SQLConf.PATH_ENABLED]] and the resolution-order setting,
+    // so this test exercises the default configuration. Both `setCurrentNamespace` and
+    // `setCurrentCatalog` were rewritten with the same split-lock pattern, so the test
+    // exercises both arms symmetrically: one thread toggles `USE SCHEMA`, another toggles
+    // `USE CATALOG` between the session catalog and a registered v2 catalog.
+    val v2Catalog = "spark_56939_testcat"
+    spark.conf.set(s"spark.sql.catalog.$v2Catalog", classOf[InMemoryCatalog].getName)
+    sql("CREATE SCHEMA IF NOT EXISTS spark_56939_s1")
+    sql("CREATE SCHEMA IF NOT EXISTS spark_56939_s2")
+    try {
+      val budget = 200
+      val iterations = new java.util.concurrent.atomic.AtomicInteger(0)
+      val barrier = new java.util.concurrent.CyclicBarrier(3)
+      val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]()
+
+      val useSchemaThread = new Thread(() => {
+        try {
+          barrier.await()
+          var i = 0
+          while (i < budget && errors.isEmpty) {
+            try {
+              sql(if ((i % 2) == 0) "USE SCHEMA spark_56939_s1" else "USE SCHEMA spark_56939_s2")
+            } catch {
+              // A concurrent `USE` from `useCatalogThread` may switch the current catalog
+              // to the v2 testcat, where these schemas don't exist; the resulting
+              // SCHEMA_NOT_FOUND is an expected interleaving and is unrelated to the
+              // deadlock this test guards against.
+              case _: NoSuchNamespaceException => ()
+            }
+            i += 1
+          }
+        } catch {
+          case t: Throwable => errors.add(t)
+        }
+      }, "SPARK-56939-use-schema")
+
+      val useCatalogThread = new Thread(() => {
+        try {
+          barrier.await()
+          var i = 0
+          while (i < budget && errors.isEmpty) {
+            // Toggle between the session catalog and a v2 catalog so each iteration
+            // exercises `setCurrentCatalog` -- the arm that previously held the manager
+            // lock across `v1SessionCatalog.setCurrentDatabase(default)`. The grammar
+            // accepts `USE identifierReference`; a single identifier resolves to a
+            // catalog when one is registered under that name.
+            sql(if ((i % 2) == 0) s"USE $v2Catalog" else "USE spark_catalog")
+            i += 1
+          }
+        } catch {
+          case t: Throwable => errors.add(t)
+        }
+      }, "SPARK-56939-use-catalog")
+
+      val lookupThread = new Thread(() => {
+        try {
+          barrier.await()
+          var i = 0
+          while (i < budget && errors.isEmpty) {
+            // Unqualified `count(*)` exercises the kinds-order provider that resolves
+            // against the live PATH via [[CatalogManager]] -- the side of the cycle
+            // that previously acquired the catalog lock first and then the manager lock.
+            val n = sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)")
+              .head().getLong(0)
+            assert(n == 3L, s"unexpected count: $n at iteration $i")
+            iterations.incrementAndGet()
+            i += 1
+          }
+        } catch {
+          case t: Throwable => errors.add(t)
+        }
+      }, "SPARK-56939-lookup")
+
+      useSchemaThread.start()
+      useCatalogThread.start()
+      lookupThread.start()
+
+      // Generous join: 30s is plenty for 200 cheap queries per thread and gives a
+      // clear failure signal if the implementation regresses into a deadlock.
+      val joinMillis = 30000L
+      useSchemaThread.join(joinMillis)
+      useCatalogThread.join(joinMillis)
+      lookupThread.join(joinMillis)
+
+      assert(!useSchemaThread.isAlive,
+        "USE SCHEMA thread did not finish; lock-order inversion between SessionCatalog and " +
+          "CatalogManager likely regressed (SPARK-56939).")
+      assert(!useCatalogThread.isAlive,
+        "USE CATALOG thread did not finish; lock-order inversion between SessionCatalog and " +
+          "CatalogManager likely regressed (SPARK-56939).")
+      assert(!lookupThread.isAlive,
+        "Lookup thread did not finish; lock-order inversion between SessionCatalog and " +
+          "CatalogManager likely regressed (SPARK-56939).")
+      assert(errors.isEmpty,
+        s"Concurrent lookups raised unexpected errors: ${errors.toArray.mkString("; ")}")
+      assert(iterations.get() > 0,
+        "Lookup thread never completed a query; suspect contention or deadlock.")
+    } finally {
+      sql("USE spark_catalog")
+      sql("USE SCHEMA default")
+      sql("DROP SCHEMA IF EXISTS spark_56939_s1 CASCADE")
+      sql("DROP SCHEMA IF EXISTS spark_56939_s2 CASCADE")
+      spark.conf.unset(s"spark.sql.catalog.$v2Catalog")
+    }
+  }
+
+  test("PATH enabled: concurrent SET PATH and unqualified lookups do not deadlock") {
+    // SessionCatalog.lookupBuiltinOrTempFunction is intentionally NOT
+    // synchronized on SessionCatalog because the path-driven kinds provider acquires
+    // CatalogManager.synchronized, and another thread holding that lock can call back
+    // into SessionCatalog (e.g. via setCurrentNamespace). This test hammers both sides
+    // concurrently: one thread flips SET PATH while another performs unqualified
+    // function lookups that go through the kinds provider. Within the budget we should
+    // observe no deadlock and no spurious analysis failures.
+    withPathEnabled {
+      val budget = 200
+      val iterations = new java.util.concurrent.atomic.AtomicInteger(0)
+      val barrier = new java.util.concurrent.CyclicBarrier(2)
+      val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]()
+
+      val setterThread = new Thread(() => {
+        try {
+          barrier.await()
+          var i = 0
+          while (i < budget && errors.isEmpty) {
+            if ((i % 2) == 0) {
+              sql("SET PATH = spark_catalog.default, system.builtin")
+            } else {
+              sql("SET PATH = system.builtin, system.session, spark_catalog.default")
+            }
+            i += 1
+          }
+        } catch {
+          case t: Throwable => errors.add(t)
+        }
+      }, "SetPathSuite-setter")
+
+      val lookupThread = new Thread(() => {
+        try {
+          barrier.await()
+          var i = 0
+          while (i < budget && errors.isEmpty) {
+            // Forces unqualified function resolution against the live PATH and triggers
+            // the session-kinds provider on the catalog-manager side.
+            val n = sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)")
+              .head().getLong(0)
+            assert(n == 3L, s"unexpected count: $n at iteration $i")
+            iterations.incrementAndGet()
+            i += 1
+          }
+        } catch {
+          case t: Throwable => errors.add(t)
+        }
+      }, "SetPathSuite-lookup")
+
+      setterThread.start()
+      lookupThread.start()
+
+      // Generous join: 30s is plenty for 200 cheap queries on either side and gives a
+      // clear failure signal if the implementation regresses into a deadlock.
+      val joinMillis = 30000L
+      setterThread.join(joinMillis)
+      lookupThread.join(joinMillis)
+
+      assert(!setterThread.isAlive,
+        "SET PATH thread did not finish; potential deadlock between SessionCatalog and " +
+          "CatalogManager synchronized blocks.")
+      assert(!lookupThread.isAlive,
+        "Lookup thread did not finish; potential deadlock between SessionCatalog and " +
+          "CatalogManager synchronized blocks.")
+      assert(errors.isEmpty,
+        s"Concurrent lookups raised unexpected errors: ${errors.toArray.mkString("; ")}")
+      assert(iterations.get() > 0,
+        "Lookup thread never completed a query; suspect contention or deadlock.")
+      sql("SET PATH = DEFAULT_PATH")
+    }
+  }
+
+  test("DEFAULT_PATH conf: duplicate entries are tolerated (first-match resolution)") {
+    // Lookup uses first-match resolution, so redundant entries on DEFAULT_PATH are dead code
+    // rather than an error. (Contrast with SET PATH, which still rejects static duplicates as
+    // a user-input typo guard.) This avoids a UX cliff where a USE SCHEMA could later wedge
+    // every unqualified function lookup with DUPLICATE_SQL_PATH_ENTRY.
+    withSQLConf(
+        SQLConf.PATH_ENABLED.key -> "true",
+        SQLConf.DEFAULT_PATH.key -> "system.builtin, system.builtin") {
+      val catalogManager = spark.sessionState.catalogManager
+      val priorSessionPath = catalogManager.storedSessionPathEntries
+      catalogManager.clearSessionPath()
+      try {
+        val entries = pathEntries(currentPath())
+        assert(entries == Seq("system.builtin", "system.builtin"),
+          s"DEFAULT_PATH duplicates should pass through to current_path(); got: $entries")
+        // Sanity: unqualified resolution still works (the second `system.builtin` is dead).
+        checkAnswer(sql("SELECT abs(-1)"), Row(1))
+      } finally {
+        catalogManager.clearSessionPath()
+        priorSessionPath.foreach(catalogManager.setSessionPath)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
index 127c9218d4b74..2d26356890d28 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
@@ -185,6 +185,39 @@ class VariantEndToEndSuite extends SharedSparkSession {
     checkAnswer(variantDF, Seq(Row(expected)))
   }
 
+  test("SPARK-56654: parse_json/from_json reject unpaired UTF-16 surrogates by default") {
+    val invalidJson = "\"\\uD835\""
+    val df = Seq(invalidJson).toDF("j")
+    checkAnswer(df.selectExpr("try_parse_json(j)"), Seq(Row(null)))
+    checkAnswer(df.selectExpr("from_json(j, 'variant')"), Seq(Row(null)))
+    val parseJsonError = intercept[SparkException] {
+      df.selectExpr("parse_json(j)").collect()
+    }
+    checkError(
+      exception = parseJsonError,
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      parameters = Map("badRecord" -> invalidJson, "failFastMode" -> "FAILFAST")
+    )
+
+    val fromJsonFailFast = intercept[SparkException] {
+      df.selectExpr("from_json(j, 'variant', map('mode', 'FAILFAST'))").collect()
+    }
+    checkError(
+      exception = fromJsonFailFast,
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST")
+    )
+
+    withSQLConf(SQLConf.VARIANT_VALIDATE_UNICODE_IN_JSON_PARSING.key -> "false") {
+      val parsed = df.selectExpr("parse_json(j)").collect()
+      assert(parsed.length == 1 && parsed.head.get(0) != null,
+        "legacy mode should accept unpaired surrogates")
+      val tryParsed = df.selectExpr("try_parse_json(j)").collect()
+      assert(tryParsed.length == 1 && tryParsed.head.get(0) != null,
+        "legacy mode should accept unpaired surrogates via try_parse_json")
+    }
+  }
+
   test("to_variant_object - Codegen Support") {
     Seq("CODEGEN_ONLY", "NO_CODEGEN").foreach { codegenMode =>
       withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenMode) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/NameScopeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/NameScopeSuite.scala
index 30f587f6480d9..458e8be1dfe0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/NameScopeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/NameScopeSuite.scala
@@ -1219,6 +1219,7 @@ class NameScopeSuite extends SharedSparkSession {
 
   private def newNameScopeStack() = new NameScopeStack(
     tempVariableManager = spark.sessionState.analyzer.catalogManager.tempVariableManager,
+    catalogManager = spark.sessionState.analyzer.catalogManager,
     subqueryRegistry = new SubqueryRegistry
   )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
index d0efde8e11b18..a6f4de1c80e3e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
@@ -47,6 +47,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   private val collationNonPreservingSources = Seq("orc", "csv", "json", "text")
   private val allFileBasedDataSources = collationPreservingSources ++  collationNonPreservingSources
   private val fullyQualifiedPrefix = s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}."
+  private val collations = Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI")
 
   @inline
   private def isSortMergeForced: Boolean = {
@@ -2738,4 +2739,397 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         |)""".stripMargin
     checkAnswer(sql(q), Seq(Row(1L), Row(1L), Row(0L)))
   }
+
+  test("execute immediate parameter with explicit COLLATE has implicit strength") {
+    collations.foreach { collation =>
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+             | USING 'hello' COLLATE UNICODE""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+             | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT ? = "HELLO" COLLATE $collation'
+             | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+        Row(collation == "UTF8_LCASE" || collation == "UNICODE_CI"))
+    }
+  }
+
+  test("execute immediate parameter without explicit COLLATE") {
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COLLATION(? || "world")'
+          | USING 'hello'""".stripMargin),
+      Row(s"${fullyQualifiedPrefix}UTF8_BINARY"))
+
+    collations.foreach { collation =>
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COLLATION(? || "world" COLLATE $collation)'
+             | USING 'hello'""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+    }
+  }
+
+  test("execute immediate parameter implicit vs column collation") {
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  lcase_col STRING COLLATE UTF8_LCASE,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT ? = lcase_col FROM t'
+            | USING 'hello' COLLATE UTF8_LCASE""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT ? = unicode_col FROM t'
+              | USING 'hello' COLLATE UTF8_LCASE""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 21, "? = unicode_col")))
+    }
+  }
+
+  test("execute immediate complex type parameter collation and strength") {
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  lcase_col STRING COLLATE UTF8_LCASE,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT ?[0] = lcase_col FROM t'
+            | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT ?[0] = unicode_col FROM t'
+              | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(array(hello)[0] = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 24, "?[0] = unicode_col")))
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT element_at(?, 1) = lcase_col FROM t'
+            | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT element_at(?, 1) = unicode_col FROM t'
+              | USING ARRAY('hello' COLLATE UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(element_at(array(hello), 1) = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 36,
+            "element_at(?, 1) = unicode_col")))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT element_at(?, "key") = lcase_col FROM t'
+             | USING MAP('key', 'hello' COLLATE UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            s"""EXECUTE IMMEDIATE
+               | 'SELECT element_at(?, "key") = unicode_col FROM t'
+               | USING MAP('key', 'hello' COLLATE UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(element_at(map(key, hello), key) = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext(
+            "EXECUTE IMMEDIATE", "", 7, 40,
+            """element_at(?, "key") = unicode_col""")))
+
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE
+            | 'SELECT ?.f1 = lcase_col FROM t'
+            | USING NAMED_STRUCT('f1', 'hello' COLLATE UTF8_LCASE)""".stripMargin),
+        Row(true))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE
+              | 'SELECT ?.f1 = unicode_col FROM t'
+              | USING NAMED_STRUCT('f1', 'hello' COLLATE UTF8_LCASE)""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(named_struct(f1, hello).f1 = unicode_col)\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 24, "?.f1 = unicode_col")))
+    }
+  }
+
+  test("execute immediate complex type parameter with explicit COLLATE") {
+    collations.foreach { collation =>
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT COLLATION(?[0])'
+             | USING ARRAY('hello' COLLATE $collation)""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT COLLATION(element_at(?, "value"))'
+             | USING MAP('value', 'hello' COLLATE $collation)""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE 'SELECT COLLATION(?.f1)'
+             | USING NAMED_STRUCT('f1', 'hello' COLLATE $collation)""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+    }
+  }
+
+  test("execute immediate variable parameter preserves collation") {
+    collations.foreach { collation =>
+      withSessionVariable("v1") {
+        sql(s"DECLARE VARIABLE v1 STRING COLLATE $collation DEFAULT 'hello'")
+        checkAnswer(
+          sql("EXECUTE IMMEDIATE 'SELECT COLLATION(?)' USING v1"),
+          Row(s"$fullyQualifiedPrefix$collation"))
+      }
+    }
+  }
+
+  test("execute immediate variable parameter has implicit strength") {
+    collations.foreach { collation =>
+      withSessionVariable("v1") {
+        sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+        checkAnswer(
+          sql(
+            s"""EXECUTE IMMEDIATE 'SELECT ? = "HELLO" COLLATE $collation'
+               | USING v1""".stripMargin),
+          Row(collation == "UTF8_LCASE" || collation == "UNICODE_CI"))
+      }
+    }
+  }
+
+  test("execute immediate variable parameter implicit vs column collation") {
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  lcase_col STRING COLLATE UTF8_LCASE,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      withSessionVariable("v1") {
+        sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+        checkAnswer(
+          sql("EXECUTE IMMEDIATE 'SELECT ? = lcase_col FROM t' USING v1"),
+          Row(true))
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("EXECUTE IMMEDIATE 'SELECT ? = unicode_col FROM t' USING v1")
+          },
+          condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+          parameters = Map("expr" ->
+            "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = unicode_col)\""),
+          queryContext = Array(
+            ExpectedContext("EXECUTE IMMEDIATE", "", 7, 21, "? = unicode_col")))
+      }
+    }
+  }
+
+  test("execute immediate two parameters with different collations") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """EXECUTE IMMEDIATE 'SELECT ? = ?'
+            | USING 'hello' COLLATE UTF8_LCASE, 'hello' COLLATE UNICODE""".stripMargin)
+      },
+      condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+      parameters = Map("expr" ->
+        "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = CAST(hello AS STRING COLLATE UNICODE))\""),
+      queryContext = Array(
+        ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+
+    withSessionVariable("v1", "v2") {
+      sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE DEFAULT 'hello'")
+      sql("DECLARE VARIABLE v2 STRING COLLATE UNICODE DEFAULT 'hello'")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("EXECUTE IMMEDIATE 'SELECT ? = ?' USING v1, v2")
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(CAST(hello AS STRING COLLATE UTF8_LCASE) = CAST(hello AS STRING COLLATE UNICODE))\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+    }
+
+    withSessionVariable("v1") {
+      sql("DECLARE VARIABLE v1 STRING COLLATE UNICODE DEFAULT 'hello'")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """EXECUTE IMMEDIATE 'SELECT ? = ?'
+              | USING v1, 'hello' COLLATE UTF8_LCASE""".stripMargin)
+        },
+        condition = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+        parameters = Map("expr" ->
+          "\"(CAST(hello AS STRING COLLATE UNICODE) = CAST(hello AS STRING COLLATE UTF8_LCASE))\""),
+        queryContext = Array(
+          ExpectedContext("EXECUTE IMMEDIATE", "", 7, 11, "? = ?")))
+    }
+  }
+
+  test("execute immediate null parameter with collation") {
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COLLATION(COALESCE(?, "hello"))'
+          | USING NULL""".stripMargin),
+      Row(s"${fullyQualifiedPrefix}UTF8_BINARY"))
+
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COALESCE(?, "hello") = "hello"'
+          | USING NULL""".stripMargin),
+      Row(true))
+
+    withSessionVariable("v1") {
+      sql("DECLARE VARIABLE v1 STRING COLLATE UTF8_LCASE")
+      checkAnswer(
+        sql("EXECUTE IMMEDIATE 'SELECT ?, COLLATION(?)' USING v1, v1"),
+        Row(null, s"${fullyQualifiedPrefix}UTF8_LCASE"))
+    }
+
+    // Both COALESCE sides have default strength but different collations, so the result
+    // is the IndeterminateCollation, whose name is "null".
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE 'SELECT COLLATION(COALESCE(?, "hello"))'
+          | USING CAST(NULL AS STRING COLLATE UNICODE)""".stripMargin),
+      Row("null"))
+
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  lcase_col STRING COLLATE UTF8_LCASE,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COALESCE(?, lcase_col) FROM t'
+             | USING CAST(NULL AS STRING COLLATE UTF8_LCASE)""".stripMargin),
+        Row("hello"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COALESCE(?, unicode_col) FROM t'
+             | USING CAST(NULL AS STRING COLLATE UTF8_LCASE)""".stripMargin),
+        Row("hello"))
+
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COALESCE(?, unicode_col) FROM t'
+             | USING NULL""".stripMargin),
+        Row("hello"))
+    }
+  }
+
+  test("execute immediate named parameter collation strength") {
+    collations.foreach { collation =>
+      checkAnswer(
+        sql(
+          s"""EXECUTE IMMEDIATE
+             | 'SELECT COLLATION(:p || "world" COLLATE $collation)'
+             | USING 'hello' COLLATE UNICODE AS p""".stripMargin),
+        Row(s"$fullyQualifiedPrefix$collation"))
+    }
+
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE
+          | 'SELECT :p = "HELLO" COLLATE UTF8_LCASE'
+          | USING 'hello' COLLATE UNICODE AS p""".stripMargin),
+      Row(true))
+  }
+
+  test("parameterized query vs column collation") {
+    withTable("t") {
+      sql(
+        """CREATE TABLE t (
+          |  binary_col STRING,
+          |  unicode_col STRING COLLATE UNICODE
+          |) USING parquet""".stripMargin)
+      sql("INSERT INTO t VALUES ('hello', 'hello')")
+
+      checkAnswer(
+        spark.sql(
+          "SELECT :p = binary_col FROM t",
+          Map("p" -> "hello")),
+        Row(true))
+
+      checkAnswer(
+        spark.sql(
+          "SELECT :p = unicode_col FROM t",
+          Map("p" -> "hello")),
+        Row(true))
+    }
+  }
+
+  test("parameterized query collation strength") {
+    checkAnswer(
+      spark.sql(
+        "SELECT :p = 'HELLO', :p = 'HELLO' COLLATE UTF8_LCASE",
+        Map("p" -> "hello")),
+      Row(false, true))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
index 670f4540a810b..82bb616480233 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
@@ -1556,6 +1556,23 @@ abstract class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
     }
   }
 
+  testString("ALTER SCHEMA DEFAULT COLLATION does not retroactively change a view's collation") {
+    _ =>
+    withDatabase(testSchema) {
+      sql(s"CREATE SCHEMA $testSchema")
+      sql(s"USE $testSchema")
+      withView(testView) {
+        sql(s"CREATE VIEW $testView AS SELECT 'a' AS c1")
+        assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
+
+        sql(s"ALTER SCHEMA $testSchema DEFAULT COLLATION UTF8_LCASE")
+        sql(s"ALTER VIEW $testView AS SELECT 'x' AS c1, 'y' AS c2")
+        assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
+        assertTableColumnCollation(testView, "c2", "UTF8_BINARY")
+      }
+    }
+  }
+
   private def testCreateViewWithSchemaLevelCollation(
       dataType: String,
       schemaDefaultCollation: String,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AppendDataTransactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AppendDataTransactionSuite.scala
new file mode 100644
index 0000000000000..aef9c65550fc4
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AppendDataTransactionSuite.scala
@@ -0,0 +1,500 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.catalog.{Aborted, Committed}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
+import org.apache.spark.sql.sources
+
+class AppendDataTransactionSuite extends RowLevelOperationSuiteBase {
+
+  test("writeTo append with transactional checks") {
+    // create table with initial data
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    // create a source on top of itself that will be fully resolved and analyzed
+    val sourceDF = spark.table(tableNameAsString)
+      .where("pk == 1")
+      .select(col("pk") + 10 as "pk", col("salary"), col("dep"))
+    sourceDF.queryExecution.assertAnalyzed()
+
+    // append data using the DataFrame API
+    val (txn, txnTables) = executeTransaction {
+      sourceDF.writeTo(tableNameAsString).append()
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size === 1)
+    assert(table.version() === "2")
+
+    // check the source scan was tracked via the transaction catalog
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size === 1)
+    assert(targetTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("pk", 1) => true
+      case _ => false
+    })
+
+    // check data was appended correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software"),
+        Row(11, 100, "hr"))) // appended
+  }
+
+  test("SQL INSERT INTO with transactional checks") {
+    // create table with initial data
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    // SQL INSERT INTO using VALUES
+    val (txn, txnTables) = executeTransaction {
+      sql(s"INSERT INTO $tableNameAsString VALUES (3, 300, 'hr'), (4, 400, 'finance')")
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(table.version() === "2")
+
+    // VALUES literal - No catalog tables were scanned
+    assert(txnTables.isEmpty)
+
+    // check data was inserted correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software"),
+        Row(3, 300, "hr"),
+        Row(4, 400, "finance")))
+  }
+
+  for (isDynamic <- Seq(false, true))
+  test(s"SQL INSERT OVERWRITE with transactional checks - isDynamic: $isDynamic") {
+    // create table with initial data; table is partitioned by dep
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    val insertOverwrite = if (isDynamic) {
+      // OverwritePartitionsDynamic
+      s"""INSERT OVERWRITE $tableNameAsString
+         |SELECT pk + 10, salary, dep FROM $tableNameAsString WHERE dep = 'hr'
+         |""".stripMargin
+    } else {
+      // OverwriteByExpression
+      s"""INSERT OVERWRITE $tableNameAsString
+         |PARTITION (dep = 'hr')
+         |SELECT pk + 10, salary FROM $tableNameAsString WHERE dep = 'hr'
+         |""".stripMargin
+    }
+
+    val confValue = if (isDynamic) PartitionOverwriteMode.DYNAMIC else PartitionOverwriteMode.STATIC
+    val (txn, txnTables) = withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> confValue.toString) {
+      executeTransaction { sql(insertOverwrite) }
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(table.version() === "2")
+
+    // the SELECT reads from the target table once with a dep='hr' filter
+    assert(txnTables.size == 1)
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size == 1)
+    assert(targetTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(2, 200, "software"),  // unchanged
+        Row(11, 100, "hr"),       // overwritten
+        Row(13, 300, "hr")))      // overwritten
+  }
+
+  test("writeTo overwrite with transactional checks") {
+    // create table with initial data; table is partitioned by dep
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // overwrite using a condition that covers the hr partition -> OverwriteByExpression
+    val sourceDF = spark.createDataFrame(Seq((11, 999, "hr"), (12, 888, "hr"))).
+      toDF("pk", "salary", "dep")
+
+    val (txn, txnTables) = executeTransaction {
+      sourceDF.writeTo(tableNameAsString).overwrite(col("dep") === "hr")
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(table.version() === "2")
+
+    // literal DataFrame source - no catalog tables were scanned
+    assert(txnTables.isEmpty)
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(2, 200, "software"),  // unchanged
+        Row(11, 999, "hr"),       // overwrote hr partition
+        Row(12, 888, "hr")))      // overwrote hr partition
+  }
+
+  test("writeTo overwritePartitions with transactional checks") {
+    // create table with initial data; table is partitioned by dep
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // overwrite partitions dynamically -> OverwritePartitionsDynamic
+    val sourceDF = spark.createDataFrame(Seq((11, 999, "hr"), (12, 888, "hr"))).
+      toDF("pk", "salary", "dep")
+
+    val (txn, txnTables) = executeTransaction {
+      sourceDF.writeTo(tableNameAsString).overwritePartitions()
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(table.version() === "2")
+
+    // literal DataFrame source - no catalog tables were scanned
+    assert(txnTables.isEmpty)
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(2, 200, "software"),  // unchanged
+        Row(11, 999, "hr"),       // overwrote hr partition
+        Row(12, 888, "hr")))      // overwrote hr partition
+  }
+
+  test("SQL INSERT INTO SELECT with transactional checks") {
+    // create table with initial data
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // SQL INSERT INTO using SELECT from the same table (self-insert)
+    val (txn, txnTables) = executeTransaction {
+      sql(s"""INSERT INTO $tableNameAsString
+             |SELECT pk + 10, salary, dep FROM $tableNameAsString WHERE dep = 'hr'
+             |""".stripMargin)
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(table.version() === "2")
+
+    // the SELECT reads from the target table once with a dep='hr' filter
+    assert(txnTables.size === 1)
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size === 1)
+    assert(targetTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    // check data was inserted correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software"),
+        Row(3, 300, "hr"),
+        Row(11, 100, "hr"), // inserted from pk=1
+        Row(13, 300, "hr"))) // inserted from pk=3
+  }
+
+  test("SQL INSERT INTO SELECT with subquery on source table and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 500, 'hr'), (3, 600, 'software')")
+
+    // INSERT using a subquery that reads from the target to filter source rows
+    // both tables are scanned through the transaction catalog
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""INSERT INTO $tableNameAsString
+           |SELECT pk + 10, salary, dep FROM $sourceNameAsString
+           |WHERE pk IN (SELECT pk FROM $tableNameAsString WHERE dep = 'hr')
+           |""".stripMargin)
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size === 2)
+    assert(table.version() === "2")
+
+    // target was scanned via the transaction catalog (IN subquery) once with dep='hr' filter
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size === 1)
+    assert(targetTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    // source was scanned via the transaction catalog exactly once (no filter)
+    val sourceTxnTable = txnTables(sourceNameAsString)
+    assert(sourceTxnTable.scanEvents.size === 1)
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software"),
+        Row(11, 500, "hr"))) // inserted: source pk=1 matched target hr row
+  }
+
+  test("SQL INSERT INTO SELECT with CTE and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 500, 'hr'), (3, 600, 'software')")
+
+    // CTE reads from target; INSERT selects from source filtered by the CTE result
+    // both tables are scanned through the transaction catalog
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""WITH hr_pks AS (SELECT pk FROM $tableNameAsString WHERE dep = 'hr')
+           |INSERT INTO $tableNameAsString
+           |SELECT pk + 10, salary, dep FROM $sourceNameAsString
+           |WHERE pk IN (SELECT pk FROM hr_pks)
+           |""".stripMargin)
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size === 2)
+    assert(table.version() === "2")
+
+    // target was scanned via the transaction catalog (CTE) once with dep='hr' filter
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size === 1)
+    assert(targetTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    // source was scanned via the transaction catalog exactly once (no filter)
+    val sourceTxnTable = txnTables(sourceNameAsString)
+    assert(sourceTxnTable.scanEvents.size === 1)
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software"),
+        Row(11, 500, "hr"))) // inserted: source pk=1 matched target hr row via CTE
+  }
+
+  test("SQL INSERT with analysis failure and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    val e = intercept[AnalysisException] {
+      sql(s"INSERT INTO $tableNameAsString SELECT nonexistent_col FROM $tableNameAsString")
+    }
+
+    assert(e.getMessage.contains("nonexistent_col"))
+    assert(catalog.lastTransaction.currentState === Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  for (isDynamic <- Seq(false, true))
+  test(s"SQL INSERT OVERWRITE with analysis failure and transactional checks" +
+      s"isDynamic: $isDynamic") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    val insertOverwrite = if (isDynamic) {
+      s"""INSERT OVERWRITE $tableNameAsString
+         |SELECT nonexistent_col, salary, dep FROM $tableNameAsString WHERE dep = 'hr'
+         |""".stripMargin
+    } else {
+      s"""INSERT OVERWRITE $tableNameAsString
+         |PARTITION (dep = 'hr')
+         |SELECT nonexistent_col FROM $tableNameAsString WHERE dep = 'hr'
+         |""".stripMargin
+    }
+
+    val confValue = if (isDynamic) PartitionOverwriteMode.DYNAMIC else PartitionOverwriteMode.STATIC
+    val e = withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> confValue.toString) {
+      intercept[AnalysisException] { sql(insertOverwrite) }
+    }
+
+    assert(e.getMessage.contains("nonexistent_col"))
+    assert(catalog.lastTransaction.currentState === Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  test("EXPLAIN INSERT SQL with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    sql(s"EXPLAIN INSERT INTO $tableNameAsString VALUES (3, 300, 'hr')")
+
+    // EXPLAIN should not start a transaction
+    assert(catalog.transaction === null)
+
+    // INSERT was not executed; data is unchanged
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software")))
+  }
+
+  test("SQL INSERT WITH SCHEMA EVOLUTION adds new column with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    sql(
+      s"""CREATE TABLE $sourceNameAsString
+         |(pk INT NOT NULL, salary INT, dep STRING, active BOOLEAN)""".stripMargin)
+    sql(s"INSERT INTO $sourceNameAsString VALUES (3, 300, 'hr', true), (4, 400, 'software', false)")
+
+    val (txn, txnTables) = executeTransaction {
+      sql(s"INSERT WITH SCHEMA EVOLUTION INTO $tableNameAsString SELECT * FROM $sourceNameAsString")
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+
+    // the new column must be visible in the committed delegate's schema
+    assert(table.schema.fieldNames.toSeq === Seq("pk", "salary", "dep", "active"))
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr", null),      // pre-existing rows: active is null
+        Row(2, 200, "software", null),
+        Row(3, 300, "hr", true),      // inserted with active
+        Row(4, 400, "software", false)))
+  }
+
+  for (isDynamic <- Seq(false, true))
+  test(s"SQL INSERT OVERWRITE WITH SCHEMA EVOLUTION adds new column with transactional checks " +
+      s"isDynamic: $isDynamic") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(
+      s"""CREATE TABLE $sourceNameAsString
+         |(pk INT NOT NULL, salary INT, dep STRING, active BOOLEAN)""".stripMargin)
+    sql(s"INSERT INTO $sourceNameAsString VALUES (11, 999, 'hr', true), (12, 888, 'hr', false)")
+
+    val insertOverwrite = if (isDynamic) {
+      s"""INSERT WITH SCHEMA EVOLUTION OVERWRITE TABLE $tableNameAsString
+         |SELECT * FROM $sourceNameAsString
+         |""".stripMargin
+    } else {
+      s"""INSERT WITH SCHEMA EVOLUTION OVERWRITE TABLE $tableNameAsString
+         |PARTITION (dep = 'hr')
+         |SELECT pk, salary, active FROM $sourceNameAsString
+         |""".stripMargin
+    }
+
+    val confValue = if (isDynamic) PartitionOverwriteMode.DYNAMIC else PartitionOverwriteMode.STATIC
+    val (txn, _) = withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> confValue.toString) {
+      executeTransaction { sql(insertOverwrite) }
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+
+    // the new column must be visible in the committed delegate's schema
+    assert(table.schema.fieldNames.contains("active"))
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(2, 200, "software", null), // unchanged (different partition)
+        Row(11, 999, "hr", true),      // overwrote hr partition
+        Row(12, 888, "hr", false)))
+  }
+
+  test("SQL INSERT WITH SCHEMA EVOLUTION analysis failure aborts transaction") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    sql(
+      s"""CREATE TABLE $sourceNameAsString
+         |(pk INT NOT NULL, salary INT, dep STRING, active BOOLEAN)""".stripMargin)
+
+    val e = intercept[AnalysisException] {
+      sql(
+        s"""INSERT WITH SCHEMA EVOLUTION INTO $tableNameAsString
+           |SELECT nonexistent_col FROM $sourceNameAsString
+           |""".stripMargin)
+    }
+
+    assert(e.getMessage.contains("nonexistent_col"))
+    assert(catalog.lastTransaction.currentState === Aborted)
+    assert(catalog.lastTransaction.isClosed)
+    // schema must be unchanged after the aborted transaction
+    assert(table.schema.fieldNames.toSeq === Seq("pk", "salary", "dep"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/CTASRTASTransactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/CTASRTASTransactionSuite.scala
new file mode 100644
index 0000000000000..8acdd8242ef1f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/CTASRTASTransactionSuite.scala
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.catalog.{Aborted, Committed, Identifier, InMemoryRowLevelOperationTable}
+import org.apache.spark.sql.sources
+
+class CTASRTASTransactionSuite extends RowLevelOperationSuiteBase {
+
+  private val newTableNameAsString = "cat.ns1.new_table"
+
+  private def newTable: InMemoryRowLevelOperationTable =
+    catalog.loadTable(Identifier.of(Array("ns1"), "new_table"))
+      .asInstanceOf[InMemoryRowLevelOperationTable]
+
+  test("CTAS with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    val (txn, txnTables) = executeTransaction {
+      sql(s"""CREATE TABLE $newTableNameAsString
+             |AS SELECT * FROM $tableNameAsString WHERE dep = 'hr'
+             |""".stripMargin)
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size === 1)
+    assert(table.version() === "1")    // source table: read-only, version unchanged
+    assert(newTable.version() === "1") // target table: newly created and written
+
+    // the source table was scanned once through the transaction catalog with a dep='hr' filter
+    val sourceTxnTable = txnTables(tableNameAsString)
+    assert(sourceTxnTable.scanEvents.size === 1)
+    assert(sourceTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    checkAnswer(
+      sql(s"SELECT * FROM $newTableNameAsString"),
+      Seq(Row(1, 100, "hr")))
+  }
+
+  test("CTAS from literal source with transactional checks") {
+    // no source catalog table involved — the query is a pure literal SELECT
+    val (txn, txnTables) = executeTransaction {
+      sql(s"CREATE TABLE $newTableNameAsString AS SELECT 1 AS pk, 100 AS salary, 'hr' AS dep")
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+
+    // literal SELECT - no catalog tables were scanned
+    assert(txnTables.isEmpty)
+    assert(newTable.version() === "1") // target table: newly created and written
+
+    checkAnswer(
+      sql(s"SELECT * FROM $newTableNameAsString"),
+      Seq(Row(1, 100, "hr")))
+  }
+
+  test("CTAS with analysis failure and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE TABLE $newTableNameAsString AS SELECT nonexistent_col FROM $tableNameAsString")
+    }
+
+    assert(e.getMessage.contains("nonexistent_col"))
+    assert(catalog.lastTransaction.currentState === Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  test("RTAS with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // pre-create the target so REPLACE TABLE (not CREATE OR REPLACE) is valid
+    sql(s"CREATE TABLE $newTableNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+
+    val (txn, txnTables) = executeTransaction {
+      sql(s"""REPLACE TABLE $newTableNameAsString
+             |AS SELECT * FROM $tableNameAsString WHERE dep = 'hr'
+             |""".stripMargin)
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size === 1)
+    assert(table.version() === "1")    // source table: read-only, version unchanged
+    assert(newTable.version() === "1") // target table: replaced and written
+
+    // the source table was scanned once through the transaction catalog with a dep='hr' filter
+    val sourceTxnTable = txnTables(tableNameAsString)
+    assert(sourceTxnTable.scanEvents.size === 1)
+    assert(sourceTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    checkAnswer(
+      sql(s"SELECT * FROM $newTableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(3, 300, "hr")))
+  }
+
+  test("RTAS self-reference with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // source and target are the same table: reads the old snapshot via TxnTable,
+    // replaces the table with a filtered version
+    val (txn, txnTables) = executeTransaction {
+      sql(s"""CREATE OR REPLACE TABLE $tableNameAsString
+             |AS SELECT * FROM $tableNameAsString WHERE dep = 'hr'
+             |""".stripMargin)
+    }
+
+    assert(txn.currentState === Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size === 1)
+    assert(table.version() === "1") // source/target table: replaced in place, version reset to 1
+
+    // the source/target table was scanned once with a dep='hr' filter
+    val sourceTxnTable = txnTables(tableNameAsString)
+    assert(sourceTxnTable.scanEvents.size === 1)
+    assert(sourceTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(3, 300, "hr")))
+  }
+
+  test("RTAS with analysis failure and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    val e = intercept[AnalysisException] {
+      sql(s"""CREATE OR REPLACE TABLE $tableNameAsString
+             |AS SELECT nonexistent_col FROM $tableNameAsString
+             |""".stripMargin)
+    }
+
+    assert(e.getMessage.contains("nonexistent_col"))
+    assert(catalog.lastTransaction.currentState === Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  test("simple CREATE TABLE and DROP TABLE do not create transactions") {
+    sql(s"CREATE TABLE $newTableNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    assert(catalog.transaction === null)
+    assert(catalog.lastTransaction === null)
+
+    sql(s"DROP TABLE $newTableNameAsString")
+    assert(catalog.transaction === null)
+    assert(catalog.lastTransaction === null)
+  }
+
+  test("EXPLAIN CTAS with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    sql(s"""EXPLAIN CREATE TABLE $newTableNameAsString
+           |AS SELECT * FROM $tableNameAsString WHERE dep = 'hr'
+           |""".stripMargin)
+
+    // EXPLAIN should not start a transaction
+    assert(catalog.transaction === null)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogEndToEndSuite.scala
index 006b645193023..bb40cd9874d21 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogEndToEndSuite.scala
@@ -25,6 +25,8 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NamedStreamingRelation
 import org.apache.spark.sql.catalyst.streaming.UserProvided
 import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.Changelog.{
+  CHANGE_TYPE_DELETE, CHANGE_TYPE_INSERT, CHANGE_TYPE_UPDATE_POSTIMAGE, CHANGE_TYPE_UPDATE_PREIMAGE}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{LongType, StringType}
@@ -93,12 +95,12 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() returns change data") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "delete", 2L, 2000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_DELETE, 2L, 2000000L)))
 
     val expected = Seq(
-      Row(1L, "a", "insert", 1L, new Timestamp(1000L)),
-      Row(2L, "b", "delete", 2L, new Timestamp(2000L)))
+      Row(1L, "a", CHANGE_TYPE_INSERT, 1L, new Timestamp(1000L)),
+      Row(2L, "b", CHANGE_TYPE_DELETE, 2L, new Timestamp(2000L)))
 
     // DataFrame API
     checkAnswer(
@@ -116,13 +118,13 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() with open-ended version range") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 2L, 2000000L),
-      makeChangeRow(3L, "c", "insert", 3L, 3000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 2L, 2000000L),
+      makeChangeRow(3L, "c", CHANGE_TYPE_INSERT, 3L, 3000000L)))
 
     val expected = Seq(
-      Row(2L, "b", "insert", 2L, new Timestamp(2000L)),
-      Row(3L, "c", "insert", 3L, new Timestamp(3000L)))
+      Row(2L, "b", CHANGE_TYPE_INSERT, 2L, new Timestamp(2000L)),
+      Row(3L, "c", CHANGE_TYPE_INSERT, 3L, new Timestamp(3000L)))
 
     // DataFrame API
     checkAnswer(
@@ -157,12 +159,12 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() select CDC metadata columns") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "delete", 2L, 2000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_DELETE, 2L, 2000000L)))
 
     val expected = Seq(
-      Row(1L, "insert", 1L),
-      Row(2L, "delete", 2L))
+      Row(1L, CHANGE_TYPE_INSERT, 1L),
+      Row(2L, CHANGE_TYPE_DELETE, 2L))
 
     // DataFrame API
     checkAnswer(
@@ -179,9 +181,9 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() with projection and filter") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 1L, 1000000L),
-      makeChangeRow(1L, "a2", "insert", 2L, 2000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(1L, "a2", CHANGE_TYPE_INSERT, 2L, 2000000L)))
 
     val expected = Seq(Row(1L, "a2"))
 
@@ -200,13 +202,13 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() with aggregation on change types") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 1L, 1000000L),
-      makeChangeRow(1L, "a", "delete", 2L, 2000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(1L, "a", CHANGE_TYPE_DELETE, 2L, 2000000L)))
 
     val expected = Seq(
-      Row("insert", 2L),
-      Row("delete", 1L))
+      Row(CHANGE_TYPE_INSERT, 2L),
+      Row(CHANGE_TYPE_DELETE, 1L))
 
     // DataFrame API
     checkAnswer(
@@ -223,7 +225,7 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("schema includes CDC metadata columns") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L)))
 
     // DataFrame API
     val dfApi = spark.read.option("startingVersion", "1").changes(fullTableName)
@@ -242,14 +244,14 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() version range filters correctly") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 2L, 2000000L),
-      makeChangeRow(3L, "c", "insert", 3L, 3000000L),
-      makeChangeRow(4L, "d", "insert", 4L, 4000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 2L, 2000000L),
+      makeChangeRow(3L, "c", CHANGE_TYPE_INSERT, 3L, 3000000L),
+      makeChangeRow(4L, "d", CHANGE_TYPE_INSERT, 4L, 4000000L)))
 
     val expected = Seq(
-      Row(2L, "b", "insert", 2L, new Timestamp(2000L)),
-      Row(3L, "c", "insert", 3L, new Timestamp(3000L)))
+      Row(2L, "b", CHANGE_TYPE_INSERT, 2L, new Timestamp(2000L)),
+      Row(3L, "c", CHANGE_TYPE_INSERT, 3L, new Timestamp(3000L)))
 
     // DataFrame API
     checkAnswer(
@@ -269,14 +271,14 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() default bounds are inclusive") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 2L, 2000000L),
-      makeChangeRow(3L, "c", "insert", 3L, 3000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 2L, 2000000L),
+      makeChangeRow(3L, "c", CHANGE_TYPE_INSERT, 3L, 3000000L)))
 
     val expected = Seq(
-      Row(1L, "a", "insert", 1L, new Timestamp(1000L)),
-      Row(2L, "b", "insert", 2L, new Timestamp(2000L)),
-      Row(3L, "c", "insert", 3L, new Timestamp(3000L)))
+      Row(1L, "a", CHANGE_TYPE_INSERT, 1L, new Timestamp(1000L)),
+      Row(2L, "b", CHANGE_TYPE_INSERT, 2L, new Timestamp(2000L)),
+      Row(3L, "c", CHANGE_TYPE_INSERT, 3L, new Timestamp(3000L)))
 
     // DataFrame API - default (both inclusive)
     checkAnswer(
@@ -300,14 +302,14 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() with startingBoundInclusive=false") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 2L, 2000000L),
-      makeChangeRow(3L, "c", "insert", 3L, 3000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 2L, 2000000L),
+      makeChangeRow(3L, "c", CHANGE_TYPE_INSERT, 3L, 3000000L)))
 
     // Exclusive start: version 1 excluded, versions 2 and 3 included
     val expected = Seq(
-      Row(2L, "b", "insert", 2L, new Timestamp(2000L)),
-      Row(3L, "c", "insert", 3L, new Timestamp(3000L)))
+      Row(2L, "b", CHANGE_TYPE_INSERT, 2L, new Timestamp(2000L)),
+      Row(3L, "c", CHANGE_TYPE_INSERT, 3L, new Timestamp(3000L)))
 
     // DataFrame API
     checkAnswer(
@@ -327,14 +329,14 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() with endingBoundInclusive=false") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 2L, 2000000L),
-      makeChangeRow(3L, "c", "insert", 3L, 3000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 2L, 2000000L),
+      makeChangeRow(3L, "c", CHANGE_TYPE_INSERT, 3L, 3000000L)))
 
     // Exclusive end: versions 1 and 2 included, version 3 excluded
     val expected = Seq(
-      Row(1L, "a", "insert", 1L, new Timestamp(1000L)),
-      Row(2L, "b", "insert", 2L, new Timestamp(2000L)))
+      Row(1L, "a", CHANGE_TYPE_INSERT, 1L, new Timestamp(1000L)),
+      Row(2L, "b", CHANGE_TYPE_INSERT, 2L, new Timestamp(2000L)))
 
     // DataFrame API
     checkAnswer(
@@ -354,13 +356,13 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() with both bounds exclusive") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 2L, 2000000L),
-      makeChangeRow(3L, "c", "insert", 3L, 3000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 2L, 2000000L),
+      makeChangeRow(3L, "c", CHANGE_TYPE_INSERT, 3L, 3000000L)))
 
     // Both exclusive: only version 2 included
     val expected = Seq(
-      Row(2L, "b", "insert", 2L, new Timestamp(2000L)))
+      Row(2L, "b", CHANGE_TYPE_INSERT, 2L, new Timestamp(2000L)))
 
     // DataFrame API
     checkAnswer(
@@ -383,24 +385,24 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("changes() default deduplication mode is dropCarryovers") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L)))
 
     // DataFrame API
     spark.read.option("startingVersion", "1").changes(fullTableName).collect()
-    val info1 = catalog.lastChangelogInfo.get
-    assert(info1.deduplicationMode() === ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
+    val info1 = catalog.lastChangelogContext.get
+    assert(info1.deduplicationMode() === ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
     assert(info1.computeUpdates() === false)
 
     // SQL (no WITH clause = defaults)
     sql(s"SELECT * FROM $fullTableName CHANGES FROM VERSION 1").collect()
-    val info2 = catalog.lastChangelogInfo.get
-    assert(info2.deduplicationMode() === ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS)
+    val info2 = catalog.lastChangelogContext.get
+    assert(info2.deduplicationMode() === ChangelogContext.DeduplicationMode.DROP_CARRYOVERS)
     assert(info2.computeUpdates() === false)
   }
 
   test("changes() with deduplicationMode none") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L)))
 
     // DataFrame API
     spark.read
@@ -408,44 +410,39 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
       .option("deduplicationMode", "none")
       .changes(fullTableName)
       .collect()
-    assert(catalog.lastChangelogInfo.get.deduplicationMode() ===
-      ChangelogInfo.DeduplicationMode.NONE)
+    assert(catalog.lastChangelogContext.get.deduplicationMode() ===
+      ChangelogContext.DeduplicationMode.NONE)
 
     // SQL
     sql(s"SELECT * FROM $fullTableName CHANGES FROM VERSION 1 " +
       "WITH (deduplicationMode = 'none')").collect()
-    assert(catalog.lastChangelogInfo.get.deduplicationMode() ===
-      ChangelogInfo.DeduplicationMode.NONE)
+    assert(catalog.lastChangelogContext.get.deduplicationMode() ===
+      ChangelogContext.DeduplicationMode.NONE)
   }
 
-  test("changes() passes deduplicationMode and computeUpdates to catalog") {
+  test("changes() passes computeUpdates to catalog") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L)))
 
     // DataFrame API
     spark.read
       .option("startingVersion", "1")
-      .option("deduplicationMode", "netChanges")
       .option("computeUpdates", "true")
       .changes(fullTableName)
       .collect()
-    val info1 = catalog.lastChangelogInfo.get
-    assert(info1.deduplicationMode() === ChangelogInfo.DeduplicationMode.NET_CHANGES)
-    assert(info1.computeUpdates() === true)
+    assert(catalog.lastChangelogContext.get.computeUpdates() === true)
 
     // SQL
     sql(s"SELECT * FROM $fullTableName CHANGES FROM VERSION 1 " +
-      "WITH (deduplicationMode = 'netChanges', computeUpdates = 'true')").collect()
-    val info2 = catalog.lastChangelogInfo.get
-    assert(info2.deduplicationMode() === ChangelogInfo.DeduplicationMode.NET_CHANGES)
-    assert(info2.computeUpdates() === true)
+      "WITH (computeUpdates = 'true')").collect()
+    assert(catalog.lastChangelogContext.get.computeUpdates() === true)
   }
 
   // ---------- Batch: timestamp range ----------
 
   test("changes() with timestamp range") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L)))
 
     // DataFrame API
     spark.read
@@ -453,14 +450,14 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
       .option("endingTimestamp", "2024-12-31 23:59:59")
       .changes(fullTableName)
       .collect()
-    assert(catalog.lastChangelogInfo.get.range()
+    assert(catalog.lastChangelogContext.get.range()
       .isInstanceOf[ChangelogRange.TimestampRange])
 
     // SQL
     sql(s"SELECT * FROM $fullTableName " +
       "CHANGES FROM TIMESTAMP '2024-01-01 00:00:00' " +
       "TO TIMESTAMP '2024-12-31 23:59:59'").collect()
-    assert(catalog.lastChangelogInfo.get.range()
+    assert(catalog.lastChangelogContext.get.range()
       .isInstanceOf[ChangelogRange.TimestampRange])
   }
 
@@ -480,14 +477,14 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("streaming changes() returns change data") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 1L, 1000000L),
-      makeChangeRow(1L, "a", "delete", 2L, 2000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(1L, "a", CHANGE_TYPE_DELETE, 2L, 2000000L)))
 
     val expected = Seq(
-      Row(1L, "a", "insert", 1L, new Timestamp(1000L)),
-      Row(2L, "b", "insert", 1L, new Timestamp(1000L)),
-      Row(1L, "a", "delete", 2L, new Timestamp(2000L)))
+      Row(1L, "a", CHANGE_TYPE_INSERT, 1L, new Timestamp(1000L)),
+      Row(2L, "b", CHANGE_TYPE_INSERT, 1L, new Timestamp(1000L)),
+      Row(1L, "a", CHANGE_TYPE_DELETE, 2L, new Timestamp(2000L)))
 
     // DataFrame API
     val dfApiStream = spark.readStream
@@ -517,12 +514,12 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("streaming changes() with startingVersion filters data") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 1L, 1000000L),
-      makeChangeRow(1L, "a", "delete", 2L, 2000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(1L, "a", CHANGE_TYPE_DELETE, 2L, 2000000L)))
 
     val expected = Seq(
-      Row(1L, "a", "delete", 2L, new Timestamp(2000L)))
+      Row(1L, "a", CHANGE_TYPE_DELETE, 2L, new Timestamp(2000L)))
 
     // DataFrame API
     val dfApiStream = spark.readStream
@@ -552,9 +549,9 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("streaming changes() with projection and filter") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L),
-      makeChangeRow(2L, "b", "insert", 1L, 1000000L),
-      makeChangeRow(3L, "c", "insert", 2L, 2000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(2L, "b", CHANGE_TYPE_INSERT, 1L, 1000000L),
+      makeChangeRow(3L, "c", CHANGE_TYPE_INSERT, 2L, 2000000L)))
 
     val expected = Seq(Row(1L, "a"), Row(2L, "b"))
 
@@ -589,23 +586,20 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   // ---------- Streaming: CDC options ----------
 
-  test("streaming changes() passes deduplicationMode and computeUpdates to catalog") {
+  test("streaming changes() passes computeUpdates to catalog") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L)))
 
     // DataFrame API
     val dfApiStream = spark.readStream
       .option("startingVersion", "1")
-      .option("deduplicationMode", "netChanges")
       .option("computeUpdates", "true")
       .changes(fullTableName)
     val q1 = dfApiStream.writeStream
       .format("memory").queryName("cdc_stream_opts_df").start()
     try {
       q1.processAllAvailable()
-      val info1 = catalog.lastChangelogInfo.get
-      assert(info1.deduplicationMode() === ChangelogInfo.DeduplicationMode.NET_CHANGES)
-      assert(info1.computeUpdates() === true)
+      assert(catalog.lastChangelogContext.get.computeUpdates() === true)
     } finally {
       q1.stop()
     }
@@ -613,14 +607,12 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
     // SQL
     val sqlStream = sql(
       s"SELECT * FROM STREAM $fullTableName CHANGES FROM VERSION 1 " +
-        "WITH (deduplicationMode = 'netChanges', computeUpdates = 'true')")
+        "WITH (computeUpdates = 'true')")
     val q2 = sqlStream.writeStream
       .format("memory").queryName("cdc_stream_opts_sql").start()
     try {
       q2.processAllAvailable()
-      val info2 = catalog.lastChangelogInfo.get
-      assert(info2.deduplicationMode() === ChangelogInfo.DeduplicationMode.NET_CHANGES)
-      assert(info2.computeUpdates() === true)
+      assert(catalog.lastChangelogContext.get.computeUpdates() === true)
     } finally {
       q2.stop()
     }
@@ -630,10 +622,10 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
 
   test("streaming changes() supports .name() API with source evolution enabled") {
     catalog.addChangeRows(ident, Seq(
-      makeChangeRow(1L, "a", "insert", 1L, 1000000L)))
+      makeChangeRow(1L, "a", CHANGE_TYPE_INSERT, 1L, 1000000L)))
 
     val expected = Seq(
-      Row(1L, "a", "insert", 1L, new Timestamp(1000L)))
+      Row(1L, "a", CHANGE_TYPE_INSERT, 1L, new Timestamp(1000L)))
 
     withSQLConf(SQLConf.ENABLE_STREAMING_SOURCE_EVOLUTION.key -> "true") {
       val stream = spark.readStream
@@ -671,4 +663,855 @@ class ChangelogEndToEndSuite extends SharedSparkSession {
     }
     assert(e.getMessage.contains("changes"))
   }
+
+  // ---------- Streaming: row-level post-processing ----------
+  //
+  // Streaming row-level passes (carry-over removal, update detection) rewrite the plan
+  // into Aggregate(rowId, _commit_version, _commit_timestamp) -> [Filter] ->
+  // Generate(Inline(events)) -> [relabel Project], under an EventTimeWatermark on
+  // _commit_timestamp.
+
+  /** Schema variant for post-processing tests: includes `row_commit_version`. */
+  private def recreateWithRowVersion(): Identifier = {
+    val id = ident
+    val cat = catalog
+    if (cat.tableExists(id)) cat.dropTable(id)
+    cat.createTable(
+      id,
+      Array(
+        Column.create("id", LongType, false),
+        Column.create("data", StringType),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty,
+      new util.HashMap[String, String]())
+    cat.clearChangeRows(id)
+    id
+  }
+
+  /** Row constructor for the row-version-enabled schema. */
+  private def ppRow(
+      id: Long,
+      data: String,
+      rcv: Long,
+      changeType: String,
+      commitVersion: Long,
+      commitTimestampMicros: Long): InternalRow = {
+    InternalRow(
+      id,
+      UTF8String.fromString(data),
+      rcv,
+      UTF8String.fromString(changeType),
+      commitVersion,
+      commitTimestampMicros)
+  }
+
+  test("streaming carry-over removal drops CoW pairs") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: insert Alice (rcv=1), Bob (rcv=1)
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: real delete Alice + carry-over for Bob (rcv unchanged)
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 2L, 2000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_carryover")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      // The next micro-batch advances the input watermark to the max _commit_timestamp
+      // seen in the previous batch; append-mode aggregate eviction (eventTime <= watermark)
+      // then emits all groups including the highest commit. v1 inserts + Alice's real
+      // delete survive; Bob's carry-over pair at v2 is dropped.
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_carryover"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(2L, "Bob", CHANGE_TYPE_INSERT, 1L),
+          Row(1L, "Alice", CHANGE_TYPE_DELETE, 2L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming update detection relabels delete+insert as update") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: insert Alice (rcv=1)
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: real update Alice -> Robert (delete old, insert new)
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(1L, "Robert", 2L, CHANGE_TYPE_INSERT, 2L, 2000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .option("computeUpdates", "true")
+      .option("deduplicationMode", "none")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_updates")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_updates"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(1L, "Alice", CHANGE_TYPE_UPDATE_PREIMAGE, 2L),
+          Row(1L, "Robert", CHANGE_TYPE_UPDATE_POSTIMAGE, 2L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  // TransformWithState requires the RocksDB state store backend.
+  private val rocksDbProviderConf = SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+    "org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider"
+
+  test("streaming netChanges collapses INSERT then DELETE to no output") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: insert Alice (rcv=1)
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: delete Alice -- net cancels out
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      // v3: insert Bob -- emits at end-of-input flush
+      ppRow(2L, "Bob", 3L, CHANGE_TYPE_INSERT, 3L, 3000000L)))
+
+    withSQLConf(rocksDbProviderConf) {
+      val q = spark.readStream
+        .option("startingVersion", "1")
+        .option("deduplicationMode", "netChanges")
+        .changes(fullTableName)
+        .select("id", "data", "_change_type", "_commit_version")
+        .writeStream
+        .format("memory")
+        .queryName("cdc_stream_netchanges_cancel")
+        .outputMode("append")
+        .start()
+      try {
+        q.processAllAvailable()
+        // End-of-input flushes all timers so Bob's insert emits.
+        // Alice's INSERT then DELETE cancels out (no row), and the final "Bob" stays.
+        checkAnswer(
+          spark.sql("SELECT * FROM cdc_stream_netchanges_cancel"),
+          Seq(Row(2L, "Bob", CHANGE_TYPE_INSERT, 3L)))
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
+  test("streaming netChanges with computeUpdates labels persisting rows as updates") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsIntermediateChanges = true,
+      representsUpdateAsDeleteAndInsert = false, // updates already materialized
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // Row identity 1 already exists before the stream window, so the first event we
+    // observe is its update_preimage -> existedBefore = true. The last event is the
+    // update_postimage in v2 -> existsAfter = true. With computeUpdates = true the
+    // (true, true) cell of the SPIP matrix emits a relabeled
+    // update_preimage + update_postimage pair (rather than delete + insert).
+    catalog.addChangeRows(id, Seq(
+      // v1: pre-existing Alice updated to Bob
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_UPDATE_PREIMAGE, 1L, 1000000L),
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_UPDATE_POSTIMAGE, 1L, 1000000L),
+      // v2: Bob updated to Robert -- the v1 preimage and the v2 postimage are the
+      // first and last events for row identity 1 across the entire range.
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_UPDATE_PREIMAGE, 2L, 2000000L),
+      ppRow(1L, "Robert", 2L, CHANGE_TYPE_UPDATE_POSTIMAGE, 2L, 2000000L)))
+
+    withSQLConf(rocksDbProviderConf) {
+      val q = spark.readStream
+        .option("startingVersion", "1")
+        .option("deduplicationMode", "netChanges")
+        .option("computeUpdates", "true")
+        .changes(fullTableName)
+        .select("id", "data", "_change_type")
+        .writeStream
+        .format("memory")
+        .queryName("cdc_stream_netchanges_update")
+        .outputMode("append")
+        .start()
+      try {
+        q.processAllAvailable()
+        checkAnswer(
+          spark.sql("SELECT * FROM cdc_stream_netchanges_update"),
+          Seq(
+            Row(1L, "Alice", CHANGE_TYPE_UPDATE_PREIMAGE),
+            Row(1L, "Robert", CHANGE_TYPE_UPDATE_POSTIMAGE)))
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
+  // The streaming row-level rewrite injects a streaming Aggregate, which is only
+  // semantically valid with Append output mode (Update / Complete would re-emit
+  // per-batch updates or the entire result table per batch, neither of which matches
+  // batch CDC semantics). UnsupportedOperationChecker now rejects those modes.
+
+  test("streaming row-level post-processing with update output mode is rejected") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L)))
+
+    val e = intercept[AnalysisException] {
+      spark.readStream
+        .option("startingVersion", "1")
+        .changes(fullTableName)
+        .writeStream
+        .format("memory")
+        .queryName("cdc_stream_update_rejected")
+        .outputMode("update")
+        .start()
+    }
+    assert(e.getCondition == "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+      s"Unexpected error: ${e.getMessage}")
+    assert(e.getMessage.contains("Change Data Capture"),
+      s"Error should mention CDC: ${e.getMessage}")
+  }
+
+  test("streaming row-level post-processing with complete output mode is rejected") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L)))
+
+    val e = intercept[AnalysisException] {
+      spark.readStream
+        .option("startingVersion", "1")
+        .changes(fullTableName)
+        .writeStream
+        .format("memory")
+        .queryName("cdc_stream_complete_rejected")
+        .outputMode("complete")
+        .start()
+    }
+    assert(e.getCondition == "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+      s"Unexpected error: ${e.getMessage}")
+    assert(e.getMessage.contains("Change Data Capture"),
+      s"Error should mention CDC: ${e.getMessage}")
+  }
+
+  // The streaming netChanges-only path injects a TransformWithState whose internal
+  // outputMode is Append. Without an explicit per-operator check the analyzer would
+  // happily accept the user requesting Update output mode at the writer, even though
+  // the rewrite is only valid for Append (Update would re-emit per-batch state changes
+  // that don't match batch netChanges semantics). UnsupportedOperationChecker therefore
+  // detects the netChanges processor and rejects non-Append modes with a clear error.
+  // (Complete is also rejected by the generic "Complete requires aggregations" check
+  // since the netChanges-only path has no streaming Aggregate, but we assert it here
+  // too for symmetry with the row-level rejection tests above.)
+
+  test("streaming netChanges with update output mode is rejected") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L)))
+
+    val e = intercept[AnalysisException] {
+      withSQLConf(rocksDbProviderConf) {
+        spark.readStream
+          .option("startingVersion", "1")
+          .option("deduplicationMode", "netChanges")
+          .changes(fullTableName)
+          .writeStream
+          .format("memory")
+          .queryName("cdc_stream_netchanges_update_rejected")
+          .outputMode("update")
+          .start()
+      }
+    }
+    assert(e.getCondition == "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+      s"Unexpected error: ${e.getMessage}")
+    assert(e.getMessage.contains("Change Data Capture"),
+      s"Error should mention CDC: ${e.getMessage}")
+  }
+
+  test("streaming netChanges with complete output mode is rejected") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L)))
+
+    val e = intercept[AnalysisException] {
+      withSQLConf(rocksDbProviderConf) {
+        spark.readStream
+          .option("startingVersion", "1")
+          .option("deduplicationMode", "netChanges")
+          .changes(fullTableName)
+          .writeStream
+          .format("memory")
+          .queryName("cdc_stream_netchanges_complete_rejected")
+          .outputMode("complete")
+          .start()
+      }
+    }
+    assert(e.getCondition == "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+      s"Unexpected error: ${e.getMessage}")
+  }
+
+  test("streaming row-level rewrite raises on NULL _commit_timestamp") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // Insert a row with NULL _commit_timestamp (last column).
+    val row = InternalRow(
+      1L, UTF8String.fromString("Alice"), 1L,
+      UTF8String.fromString(CHANGE_TYPE_INSERT), 1L, null)
+    catalog.addChangeRows(id, Seq(row))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .changes(fullTableName)
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_null_ts")
+      .outputMode("append")
+      .start()
+    try {
+      val e = intercept[org.apache.spark.sql.streaming.StreamingQueryException] {
+        q.processAllAvailable()
+      }
+      // The CHANGELOG_CONTRACT_VIOLATION runtime error wraps the message; it should
+      // mention NULL_COMMIT_TIMESTAMP somewhere in the chain.
+      assert(e.getMessage.contains("NULL_COMMIT_TIMESTAMP") ||
+        Option(e.getCause).map(_.getMessage).getOrElse("").contains("NULL_COMMIT_TIMESTAMP"),
+        s"Expected NULL_COMMIT_TIMESTAMP in the error chain. Got: ${e.getMessage}")
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming netChanges emits DELETE for pre-existing row deleted in range") {
+    // Exercises the SPIP `(true, false)` cell: existedBefore = true (first event is a
+    // delete or update_preimage), existsAfter = false (last event is a delete).
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: pre-existing Alice gets updated to Bob.
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_UPDATE_PREIMAGE, 1L, 1000000L),
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_UPDATE_POSTIMAGE, 1L, 1000000L),
+      // v2: Bob deleted -- the v1 preimage is the first event and the v2 delete is
+      // the last event for row identity 1 across the entire range.
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      // v3: insert Carol -- gives the watermark something to advance past, so row
+      // identity 1's timer fires before end-of-input.
+      ppRow(2L, "Carol", 3L, CHANGE_TYPE_INSERT, 3L, 3000000L)))
+
+    withSQLConf(rocksDbProviderConf) {
+      val q = spark.readStream
+        .option("startingVersion", "1")
+        .option("deduplicationMode", "netChanges")
+        .changes(fullTableName)
+        .select("id", "data", "_change_type")
+        .writeStream
+        .format("memory")
+        .queryName("cdc_stream_netchanges_delete")
+        .outputMode("append")
+        .start()
+      try {
+        q.processAllAvailable()
+        checkAnswer(
+          spark.sql("SELECT * FROM cdc_stream_netchanges_delete"),
+          Seq(
+            // (true, false): emit a single DELETE carrying the *first* event's data
+            // (the preimage), per the batch contract.
+            Row(1L, "Alice", CHANGE_TYPE_DELETE),
+            Row(2L, "Carol", CHANGE_TYPE_INSERT)))
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
+  test("streaming netChanges without computeUpdates keeps persisting rows as DELETE+INSERT") {
+    // Exercises the SPIP `(true, true)` cell with computeUpdates = false: the pair is
+    // emitted as DELETE + INSERT rather than relabeled as
+    // update_preimage + update_postimage.
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: pre-existing Alice updated to Bob.
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_UPDATE_PREIMAGE, 1L, 1000000L),
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_UPDATE_POSTIMAGE, 1L, 1000000L),
+      // v2: Bob updated to Robert -- row identity 1 spans (preimage Alice) ..
+      // (postimage Robert).
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_UPDATE_PREIMAGE, 2L, 2000000L),
+      ppRow(1L, "Robert", 2L, CHANGE_TYPE_UPDATE_POSTIMAGE, 2L, 2000000L)))
+
+    withSQLConf(rocksDbProviderConf) {
+      val q = spark.readStream
+        .option("startingVersion", "1")
+        .option("deduplicationMode", "netChanges")
+        // computeUpdates defaults to false.
+        .changes(fullTableName)
+        .select("id", "data", "_change_type")
+        .writeStream
+        .format("memory")
+        .queryName("cdc_stream_netchanges_no_compute_updates")
+        .outputMode("append")
+        .start()
+      try {
+        q.processAllAvailable()
+        checkAnswer(
+          spark.sql("SELECT * FROM cdc_stream_netchanges_no_compute_updates"),
+          Seq(
+            Row(1L, "Alice", CHANGE_TYPE_DELETE),
+            Row(1L, "Robert", CHANGE_TYPE_INSERT)))
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
+  test("streaming netChanges + carry-over removal: combined post-processing") {
+    // Validates the design point that the row-level rewrite and the netChanges rewrite
+    // share a single EventTimeWatermark on `_commit_timestamp` and produce the
+    // expected combined result. Carry-over CoW pairs are dropped before the netChanges
+    // collapse runs, so the final emission only reflects real content changes.
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: insert Alice, Bob (rcv=1).
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: real delete Alice + carry-over for Bob (rcv unchanged means CoW rewrite,
+      // no content change).
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 2L, 2000000L),
+      // v3: insert Carol -- advances the watermark past v2 so timers for row
+      // identities 1 and 2 fire and the netChanges output is emitted.
+      ppRow(3L, "Carol", 3L, CHANGE_TYPE_INSERT, 3L, 3000000L)))
+
+    withSQLConf(rocksDbProviderConf) {
+      val q = spark.readStream
+        .option("startingVersion", "1")
+        .option("deduplicationMode", "netChanges")
+        .changes(fullTableName)
+        .select("id", "data", "_change_type")
+        .writeStream
+        .format("memory")
+        .queryName("cdc_stream_netchanges_with_carryover")
+        .outputMode("append")
+        .start()
+      try {
+        q.processAllAvailable()
+        // After carry-over removal: Alice has v1 INSERT + v2 DELETE; Bob has only
+        // v1 INSERT (the v2 CoW pair was dropped); Carol has v3 INSERT.
+        // After netChanges:
+        //   Alice -- (false, false) -> no output
+        //   Bob   -- (false, true)  -> emit INSERT
+        //   Carol -- (false, true)  -> emit INSERT
+        checkAnswer(
+          spark.sql("SELECT * FROM cdc_stream_netchanges_with_carryover"),
+          Seq(
+            Row(2L, "Bob", CHANGE_TYPE_INSERT),
+            Row(3L, "Carol", CHANGE_TYPE_INSERT)))
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
+  // ---------- Streaming: extended row-level post-processing coverage ----------
+
+  test("streaming carry-over removal with composite rowId removes pairs per (id, name)") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id", "data"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: insert two rows that share id=1 but different `data`. The composite rowId
+      // (id, data) means each is its own row identity.
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: CoW carry-over for (1, "Alice"); real delete for (1, "Bob").
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 2L, 2000000L),
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_composite_carryover")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      // (1, "Alice") carry-over is dropped; (1, "Bob") delete survives. With broken
+      // single-column rowId partitioning the four v2 rows would collapse into one
+      // partition with del_cnt=2 / ins_cnt=1 and no rows would qualify as carry-over.
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_composite_carryover"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(1L, "Bob", CHANGE_TYPE_INSERT, 1L),
+          Row(1L, "Bob", CHANGE_TYPE_DELETE, 2L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming update detection with composite rowId keeps different tuples raw") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id", "data"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: insert pre-existing Alice and Bob (so v2 has rows to fall through).
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(1L, "Bob", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: delete (1, "Alice") and insert (1, "Bob"). These are DIFFERENT composite
+      // rowIds; they MUST NOT be relabeled as update.
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(1L, "Carol", 2L, CHANGE_TYPE_INSERT, 2L, 2000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .option("computeUpdates", "true")
+      .option("deduplicationMode", "none")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_composite_updates")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_composite_updates"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT),
+          Row(1L, "Bob", CHANGE_TYPE_INSERT),
+          Row(1L, "Alice", CHANGE_TYPE_DELETE),
+          Row(1L, "Carol", CHANGE_TYPE_INSERT)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming carry-over removal and update detection across multiple commits") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      // v1: insert 3 rows.
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(3L, "Charlie", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: real delete Alice; CoW carry-overs for Bob/Charlie.
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 2L, 2000000L),
+      ppRow(3L, "Charlie", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(3L, "Charlie", 1L, CHANGE_TYPE_INSERT, 2L, 2000000L),
+      // v3: real update Bob -> Robert (rcv bumps); CoW for Charlie.
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_DELETE, 3L, 3000000L),
+      ppRow(2L, "Robert", 3L, CHANGE_TYPE_INSERT, 3L, 3000000L),
+      ppRow(3L, "Charlie", 1L, CHANGE_TYPE_DELETE, 3L, 3000000L),
+      ppRow(3L, "Charlie", 1L, CHANGE_TYPE_INSERT, 3L, 3000000L),
+      // v4: insert Diana.
+      ppRow(4L, "Diana", 4L, CHANGE_TYPE_INSERT, 4L, 4000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .option("computeUpdates", "true")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_multi_commit")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      val result = spark.sql(
+        "SELECT * FROM cdc_stream_multi_commit ORDER BY _commit_version, id, _change_type")
+      checkAnswer(
+        result,
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(2L, "Bob", CHANGE_TYPE_INSERT, 1L),
+          Row(3L, "Charlie", CHANGE_TYPE_INSERT, 1L),
+          // v2: only Alice's real delete survives (Bob and Charlie carry-over dropped).
+          Row(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+          // v3: Bob -> Robert relabeled as update_pre/postimage; Charlie carry-over
+          // dropped. The ORDER BY breaks ties on `_change_type` ascending where
+          // `update_postimage` < `update_preimage` alphabetically.
+          Row(2L, "Robert", CHANGE_TYPE_UPDATE_POSTIMAGE, 3L),
+          Row(2L, "Bob", CHANGE_TYPE_UPDATE_PREIMAGE, 3L),
+          // v4
+          Row(4L, "Diana", CHANGE_TYPE_INSERT, 4L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming DELETE-all-rows: only deletes survive at the deleting commit") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_delete_all")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_delete_all"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(2L, "Bob", CHANGE_TYPE_INSERT, 1L),
+          Row(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+          Row(2L, "Bob", CHANGE_TYPE_DELETE, 2L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming UPDATE-all-rows: every row gets update_pre/postimage") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2: every row is a real update (different rcv on pre vs post).
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(1L, "AliceUpdated", 2L, CHANGE_TYPE_INSERT, 2L, 2000000L),
+      ppRow(2L, "Bob", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(2L, "BobUpdated", 2L, CHANGE_TYPE_INSERT, 2L, 2000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .option("computeUpdates", "true")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_update_all")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_update_all"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(2L, "Bob", CHANGE_TYPE_INSERT, 1L),
+          Row(1L, "Alice", CHANGE_TYPE_UPDATE_PREIMAGE, 2L),
+          Row(1L, "AliceUpdated", CHANGE_TYPE_UPDATE_POSTIMAGE, 2L),
+          Row(2L, "Bob", CHANGE_TYPE_UPDATE_PREIMAGE, 2L),
+          Row(2L, "BobUpdated", CHANGE_TYPE_UPDATE_POSTIMAGE, 2L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming append-only workload: all inserts pass through") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      // Append-only connectors typically declare no special CDC capabilities.
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      ppRow(2L, "Bob", 2L, CHANGE_TYPE_INSERT, 2L, 2000000L),
+      ppRow(3L, "Charlie", 3L, CHANGE_TYPE_INSERT, 3L, 3000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_append_only")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_append_only"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(2L, "Bob", CHANGE_TYPE_INSERT, 2L),
+          Row(3L, "Charlie", CHANGE_TYPE_INSERT, 3L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming no-op UPDATE is labeled as update (rcv differs on pre/post)") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(id, Seq(
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_INSERT, 1L, 1000000L),
+      // v2 no-op update: identical data, but rcv differs (Delta bumps it on every UPDATE).
+      // Carry-over filter requires _min_rv = _max_rv, which is false here, so this is
+      // correctly treated as a real update -- not a carry-over.
+      ppRow(1L, "Alice", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+      ppRow(1L, "Alice", 2L, CHANGE_TYPE_INSERT, 2L, 2000000L)))
+
+    val q = spark.readStream
+      .option("startingVersion", "1")
+      .option("computeUpdates", "true")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type", "_commit_version")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_noop_update")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_noop_update"),
+        Seq(
+          Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+          Row(1L, "Alice", CHANGE_TYPE_UPDATE_PREIMAGE, 2L),
+          Row(1L, "Alice", CHANGE_TYPE_UPDATE_POSTIMAGE, 2L)))
+    } finally {
+      q.stop()
+    }
+  }
+
+  test("streaming carry-over removal at scale: many CoW pairs, one real change") {
+    val id = recreateWithRowVersion()
+    catalog.setChangelogProperties(id, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val v1Inserts = (1 to 10).map { i =>
+      ppRow(i.toLong, ('A' + i - 1).toChar.toString, 1L,
+        CHANGE_TYPE_INSERT, 1L, 1000000L)
+    }
+    // At v2: row 5 is really deleted; the other 9 rows are CoW carry-over pairs
+    // (rcv unchanged on both sides means content unchanged).
+    val v2CarryOvers = (1 to 10).filter(_ != 5).flatMap { i =>
+      val name = ('A' + i - 1).toChar.toString
+      Seq(
+        ppRow(i.toLong, name, 1L, CHANGE_TYPE_DELETE, 2L, 2000000L),
+        ppRow(i.toLong, name, 1L, CHANGE_TYPE_INSERT, 2L, 2000000L))
+    }
+    val v2RealDelete = Seq(ppRow(5L, "E", 1L, CHANGE_TYPE_DELETE, 2L, 2000000L))
+    catalog.addChangeRows(id, v1Inserts ++ v2CarryOvers ++ v2RealDelete)
+
+    val q = spark.readStream
+      .option("startingVersion", "2")
+      .option("endingVersion", "2")
+      .changes(fullTableName)
+      .select("id", "data", "_change_type")
+      .writeStream
+      .format("memory")
+      .queryName("cdc_stream_many_carryovers")
+      .outputMode("append")
+      .start()
+    try {
+      q.processAllAvailable()
+      // 9 carry-over pairs dropped; only the real delete of row 5 survives.
+      checkAnswer(
+        spark.sql("SELECT * FROM cdc_stream_many_carryovers"),
+        Seq(Row(5L, "E", CHANGE_TYPE_DELETE)))
+    } finally {
+      q.stop()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala
index db6817b0c212c..082be2ac22c86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala
@@ -17,16 +17,20 @@
 
 package org.apache.spark.sql.connector
 
-import java.util
+import java.util.Collections
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.ChangelogRange
+import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference, Transform}
+import org.apache.spark.sql.connector.read.ScanBuilder
 import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSourceV2Relation}
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{LongType, StringType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * Tests for the CDC (Change Data Capture) analyzer resolution path:
@@ -63,8 +67,8 @@ class ChangelogResolutionSuite extends SharedSparkSession {
       Array(
         Column.create("id", LongType),
         Column.create("data", StringType)),
-      Array.empty,
-      new util.HashMap[String, String]())
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
 
     val noCdcCat = spark.sessionState.catalogManager.catalog(noCdcCatalogName).asTableCatalog
     val ident2 = Identifier.of(Array.empty, "test_table")
@@ -76,8 +80,8 @@ class ChangelogResolutionSuite extends SharedSparkSession {
       Array(
         Column.create("id", LongType),
         Column.create("data", StringType)),
-      Array.empty,
-      new util.HashMap[String, String]())
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
   }
 
   test("CHANGES clause resolves to DataSourceV2Relation with ChangelogTable") {
@@ -192,15 +196,386 @@ class ChangelogResolutionSuite extends SharedSparkSession {
       parameters = Map("relationId" -> "`x`"))
   }
 
-  test("CHANGES clause passes changelogInfo to catalog") {
+  test("CHANGES clause passes changelogContext to catalog") {
     sql(s"SELECT * FROM $cdcCatalogName.test_table CHANGES FROM VERSION 1 TO VERSION 5")
     val cat = spark.sessionState.catalogManager
       .catalog(cdcCatalogName)
       .asInstanceOf[InMemoryChangelogCatalog]
-    val info = cat.lastChangelogInfo
+    val info = cat.lastChangelogContext
     assert(info.isDefined)
     val range = info.get.range().asInstanceOf[ChangelogRange.VersionRange]
     assert(range.startingVersion() == "1")
     assert(range.endingVersion().get() == "5")
   }
+
+  test("user-defined options are forwarded to loadChangelog") {
+    val cat = spark.sessionState.catalogManager
+      .catalog(cdcCatalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+
+    spark.read
+      .option("startingVersion", "1")
+      .option("customOption", "customValue")
+      .changes(s"$cdcCatalogName.test_table")
+
+    val opts = cat.lastOptions
+    assert(opts.isDefined)
+    assert(opts.get.get("customOption") == "customValue")
+    assert(opts.get.get("startingVersion") == "1")
+  }
+
+  test("user-defined options are forwarded to loadChangelog - SQL WITH clause") {
+    val cat = spark.sessionState.catalogManager
+      .catalog(cdcCatalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+
+    sql(s"SELECT * FROM $cdcCatalogName.test_table CHANGES FROM VERSION 1 " +
+      "WITH ('customOption' = 'customValue')").queryExecution.analyzed
+
+    val opts = cat.lastOptions
+    assert(opts.isDefined)
+    assert(opts.get.get("customOption") == "customValue")
+  }
+
+  test("user-defined options are forwarded to loadChangelog - DataStreamReader") {
+    val cat = spark.sessionState.catalogManager
+      .catalog(cdcCatalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+
+    spark.readStream
+      .option("startingVersion", "1")
+      .option("customOption", "customValue")
+      .changes(s"$cdcCatalogName.test_table")
+      .queryExecution.analyzed
+
+    val opts = cat.lastOptions
+    assert(opts.isDefined)
+    assert(opts.get.get("customOption") == "customValue")
+    assert(opts.get.get("startingVersion") == "1")
+  }
+
+  test("user-defined options are forwarded to loadChangelog - streaming SQL") {
+    val cat = spark.sessionState.catalogManager
+      .catalog(cdcCatalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+
+    sql(s"SELECT * FROM STREAM $cdcCatalogName.test_table CHANGES FROM VERSION 1 " +
+      "WITH ('customOption' = 'customValue')").queryExecution.analyzed
+
+    val opts = cat.lastOptions
+    assert(opts.isDefined)
+    assert(opts.get.get("customOption") == "customValue")
+  }
+
+  // ===========================================================================
+  // Streaming post-processing
+  // ===========================================================================
+  //
+  // Row-level passes (carry-over removal and update detection) rewrite the streaming plan
+  // into Aggregate -> [Filter] -> Generate(Inline) -> [Project] under an
+  // EventTimeWatermark on `_commit_timestamp`. Net-change computation is still rejected
+  // since it requires reasoning over the entire requested range.
+
+  /** Re-creates the test table with non-nullable columns suitable as rowId / rowVersion. */
+  private def recreatePostProcessingTable(): Identifier = {
+    val cat = spark.sessionState.catalogManager.catalog(cdcCatalogName).asTableCatalog
+    val ident = Identifier.of(Array.empty, "test_table")
+    if (cat.tableExists(ident)) cat.dropTable(ident)
+    cat.createTable(
+      ident,
+      Array(
+        Column.create("id", LongType, false),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+    ident
+  }
+
+  private def assertStreamingRowLevelRewrite(plan: LogicalPlan): Unit = {
+    import org.apache.spark.sql.catalyst.plans.logical.{
+      Aggregate, EventTimeWatermark, Generate}
+    val watermarks = plan.collect { case w: EventTimeWatermark => w }
+    assert(watermarks.nonEmpty,
+      s"Expected EventTimeWatermark in streaming row-level rewrite. Plan:\n$plan")
+    assert(watermarks.head.eventTime.name == "_commit_timestamp",
+      s"Watermark must be on `_commit_timestamp`. Plan:\n$plan")
+    val aggs = plan.collect { case a: Aggregate => a }
+    assert(aggs.nonEmpty,
+      s"Expected Aggregate in streaming row-level rewrite. Plan:\n$plan")
+    val gens = plan.collect { case g: Generate => g }
+    assert(gens.nonEmpty,
+      s"Expected Generate(Inline) in streaming row-level rewrite. Plan:\n$plan")
+  }
+
+  test("DataStreamReader - changes() with carry-over capability rewrites plan") {
+    val ident = recreatePostProcessingTable()
+    val cat = spark.sessionState.catalogManager
+      .catalog(cdcCatalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+    cat.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = spark.readStream
+      .changes(s"$cdcCatalogName.test_table")
+      .queryExecution.analyzed
+    assertStreamingRowLevelRewrite(analyzed)
+  }
+
+  test("DataStreamReader - changes() with computeUpdates rewrites plan") {
+    val ident = recreatePostProcessingTable()
+    val cat = spark.sessionState.catalogManager
+      .catalog(cdcCatalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+    cat.setChangelogProperties(ident, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = spark.readStream
+      .option("computeUpdates", "true")
+      .option("deduplicationMode", "none")
+      .changes(s"$cdcCatalogName.test_table")
+      .queryExecution.analyzed
+    assertStreamingRowLevelRewrite(analyzed)
+  }
+
+  test("DataStreamReader - changes() with deduplicationMode=netChanges rewrites plan") {
+    import org.apache.spark.sql.catalyst.plans.logical.TransformWithState
+    val ident = recreatePostProcessingTable()
+    val cat = spark.sessionState.catalogManager
+      .catalog(cdcCatalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+    cat.setChangelogProperties(ident, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = spark.readStream
+      .option("deduplicationMode", "netChanges")
+      .changes(s"$cdcCatalogName.test_table")
+      .queryExecution.analyzed
+    val tws = analyzed.collect { case t: TransformWithState => t }
+    assert(tws.size == 1,
+      s"Expected exactly one TransformWithState; found ${tws.size}. Plan:\n$analyzed")
+  }
+
+  // ===========================================================================
+  // Generic changelog schema validation
+  // ===========================================================================
+
+  private def stubInfo(): ChangelogContext = new ChangelogContext(
+    new ChangelogRange.VersionRange("1", java.util.Optional.of("2"), true, true),
+    ChangelogContext.DeduplicationMode.DROP_CARRYOVERS,
+    false)
+
+  private def cl(name: String, cols: (String, org.apache.spark.sql.types.DataType)*)
+      : TestChangelog = {
+    new TestChangelog(name, cols.map { case (n, t) => Column.create(n, t) }.toArray)
+  }
+
+  private def missing(columnName: String): Map[String, String] =
+    Map("changelogName" -> "bad_cl", "columnName" -> columnName)
+
+  private def wrongType(columnName: String, expected: String, actual: String)
+      : Map[String, String] = Map(
+    "changelogName" -> "bad_cl",
+    "columnName" -> columnName,
+    "expectedType" -> expected,
+    "actualType" -> actual)
+
+  // Valid metadata tuples; tests swap one of these out to create broken schemas.
+  private val validChangeType = "_change_type" -> StringType
+  private val validVersion = "_commit_version" -> LongType
+  private val validTimestamp = "_commit_timestamp" -> TimestampType
+
+  test("ChangelogTable - missing _change_type column throws") {
+    checkError(
+      intercept[AnalysisException] {
+        ChangelogTable(cl("bad_cl", validVersion, validTimestamp), stubInfo())
+      },
+      condition = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN",
+      parameters = missing("_change_type"))
+  }
+
+  test("ChangelogTable - missing _commit_version column throws") {
+    checkError(
+      intercept[AnalysisException] {
+        ChangelogTable(cl("bad_cl", validChangeType, validTimestamp), stubInfo())
+      },
+      condition = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN",
+      parameters = missing("_commit_version"))
+  }
+
+  test("ChangelogTable - missing _commit_timestamp column throws") {
+    checkError(
+      intercept[AnalysisException] {
+        ChangelogTable(cl("bad_cl", validChangeType, validVersion), stubInfo())
+      },
+      condition = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN",
+      parameters = missing("_commit_timestamp"))
+  }
+
+  test("ChangelogTable - wrong _change_type data type throws") {
+    checkError(
+      intercept[AnalysisException] {
+        ChangelogTable(
+          cl("bad_cl", "_change_type" -> IntegerType, validVersion, validTimestamp),
+          stubInfo())
+      },
+      condition = "INVALID_CHANGELOG_SCHEMA.INVALID_COLUMN_TYPE",
+      parameters = wrongType("_change_type", "STRING", "INT"))
+  }
+
+  test("ChangelogTable - wrong _commit_timestamp data type throws") {
+    checkError(
+      intercept[AnalysisException] {
+        ChangelogTable(
+          cl("bad_cl", validChangeType, validVersion, "_commit_timestamp" -> LongType),
+          stubInfo())
+      },
+      condition = "INVALID_CHANGELOG_SCHEMA.INVALID_COLUMN_TYPE",
+      parameters = wrongType("_commit_timestamp", "TIMESTAMP", "BIGINT"))
+  }
+
+  test("ChangelogTable - _commit_version accepts LongType and StringType") {
+    Seq(LongType, StringType).foreach { versionType =>
+      ChangelogTable(
+        cl("any_cl", validChangeType, "_commit_version" -> versionType, validTimestamp),
+        stubInfo())
+    }
+  }
+
+  test("ChangelogTable - _commit_version rejects all other data types") {
+    val structVersion = StructType(Seq(StructField("v", LongType)))
+    Seq[(org.apache.spark.sql.types.DataType, String)](
+      // Other atomic types previously allowed under the AtomicType contract.
+      IntegerType -> "INT",
+      TimestampType -> "TIMESTAMP",
+      // Complex types (always rejected).
+      ArrayType(LongType) -> "ARRAY<BIGINT>",
+      MapType(StringType, LongType) -> "MAP<STRING, BIGINT>",
+      structVersion -> structVersion.sql).foreach { case (versionType, sql) =>
+      checkError(
+        intercept[AnalysisException] {
+          ChangelogTable(
+            cl("bad_cl", validChangeType, "_commit_version" -> versionType, validTimestamp),
+            stubInfo())
+        },
+        condition = "INVALID_CHANGELOG_SCHEMA.INVALID_COLUMN_TYPE",
+        parameters = wrongType("_commit_version", "BIGINT or STRING", sql))
+    }
+  }
+
+  test("ChangelogTable - valid schema with data columns passes") {
+    ChangelogTable(
+      cl("good_cl", "id" -> LongType, "name" -> StringType,
+        validChangeType, validVersion, validTimestamp),
+      stubInfo())
+  }
+
+  test("ChangelogTable - nested rowId and rowVersion references pass (Delta-style _metadata)") {
+    val metadataRowId = FieldReference(Seq("_metadata", "row_id"))
+    val metadataRowVersion = FieldReference(Seq("_metadata", "row_commit_version"))
+    val cl = new TestChangelog(
+      "delta_cl",
+      Array(
+        Column.create("id", LongType, false),
+        Column.create("_change_type", StringType),
+        Column.create("_commit_version", LongType),
+        Column.create("_commit_timestamp", TimestampType)),
+      carryoverRows = true,
+      rowIdRefs = Array(metadataRowId),
+      rowVersionRef = Some(metadataRowVersion))
+    ChangelogTable(cl, stubInfo())
+  }
+
+  test("ChangelogTable - representsUpdateAsDeleteAndInsert=true requires non-empty rowId") {
+    val cl = new TestChangelog(
+      "bad_cl",
+      Array(
+        Column.create("_change_type", StringType),
+        Column.create("_commit_version", LongType),
+        Column.create("_commit_timestamp", TimestampType)),
+      updateAsDeleteInsert = true,
+      rowIdRefs = Array.empty,
+      rowVersionRef = Some(FieldReference.column("_commit_version")))
+    checkError(
+      intercept[AnalysisException] { ChangelogTable(cl, stubInfo()) },
+      condition = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID",
+      parameters = Map("changelogName" -> "bad_cl"))
+  }
+
+  test("ChangelogTable - containsIntermediateChanges=true requires non-empty rowId") {
+    val cl = new TestChangelog(
+      "bad_cl",
+      Array(
+        Column.create("_change_type", StringType),
+        Column.create("_commit_version", LongType),
+        Column.create("_commit_timestamp", TimestampType)),
+      intermediateChanges = true,
+      rowIdRefs = Array.empty)
+    checkError(
+      intercept[AnalysisException] { ChangelogTable(cl, stubInfo()) },
+      condition = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID",
+      parameters = Map("changelogName" -> "bad_cl"))
+  }
+
+  test("ChangelogTable - UnsupportedOperationException surfaces when rowId() not implemented") {
+    val cl = new TestChangelog(
+      "bad_cl",
+      Array(
+        Column.create("_change_type", StringType),
+        Column.create("_commit_version", LongType),
+        Column.create("_commit_timestamp", TimestampType)),
+      carryoverRows = true,
+      rowIdSupported = false,
+      rowVersionRef = Some(FieldReference.column("_commit_version")))
+    intercept[UnsupportedOperationException] { ChangelogTable(cl, stubInfo()) }
+  }
+
+  test("ChangelogTable - UnsupportedOperationException surfaces when rowVersion() missing") {
+    val cl = new TestChangelog(
+      "bad_cl",
+      Array(
+        Column.create("_change_type", StringType),
+        Column.create("_commit_version", LongType),
+        Column.create("_commit_timestamp", TimestampType)),
+      carryoverRows = true,
+      rowIdRefs = Array(FieldReference.column("id")),
+      rowVersionRef = None)
+    intercept[UnsupportedOperationException] { ChangelogTable(cl, stubInfo()) }
+  }
+
+}
+
+/**
+ * Test-only [[Changelog]] implementation that returns a hand-crafted schema. Used to
+ * exercise [[ChangelogTable]]'s schema validation without going through a real catalog.
+ *
+ * Defaults match a minimal connector with no post-processing capabilities. Tests opt
+ * into capability flags or `rowVersion()` overrides via constructor params.
+ */
+private class TestChangelog(
+    nameArg: String,
+    cols: Array[Column],
+    carryoverRows: Boolean = false,
+    updateAsDeleteInsert: Boolean = false,
+    intermediateChanges: Boolean = false,
+    rowIdRefs: Array[NamedReference] = Array.empty,
+    rowIdSupported: Boolean = true,
+    rowVersionRef: Option[NamedReference] = None) extends Changelog {
+  override def name(): String = nameArg
+  override def columns(): Array[Column] = cols
+  override def containsCarryoverRows(): Boolean = carryoverRows
+  override def containsIntermediateChanges(): Boolean = intermediateChanges
+  override def representsUpdateAsDeleteAndInsert(): Boolean = updateAsDeleteInsert
+  override def rowId(): Array[NamedReference] =
+    if (rowIdSupported) rowIdRefs else super.rowId()
+  override def rowVersion(): NamedReference =
+    rowVersionRef.getOrElse(super.rowVersion())
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    throw new UnsupportedOperationException("not needed for schema validation tests")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2CacheTableReadTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2CacheTableReadTests.scala
new file mode 100644
index 0000000000000..ac6ffcc6ecc0d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2CacheTableReadTests.scala
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, InMemoryTableCatalog, TableChange, TableInfo}
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Shared CACHE TABLE impact on reads tests for DSv2 tables. Write operations ignore the
+ * cache, so these tests verify how reads behave when a cached table is mutated by session
+ * SQL or external catalog API calls:
+ *
+ *  - Scenario 1 (external write): cache pins the read, external write invisible until REFRESH.
+ *  - Scenario 2 (session write then external write): session write rebuilds cache,
+ *    subsequent external write invisible until REFRESH.
+ *  - Scenario 3 (external schema change): cache pinned at original schema until REFRESH.
+ *  - Scenario 4 (session schema change then external write): session ALTER rebuilds
+ *    cache, subsequent external write invisible until REFRESH.
+ *  - Scenario 5 (external drop and recreate table): query sees new empty table.
+ *
+ * Scenario 1 includes a `cachingcat` variant to verify the two-layer REFRESH behavior:
+ * [[org.apache.spark.sql.execution.datasources.v2.RefreshTableExec]] calls both
+ * `invalidateTable` (clearing the connector cache) and the CacheManager rebuild, so external
+ * writes become visible after REFRESH even with a caching connector. Scenarios 2 through 4
+ * omit `cachingcat` because the CacheManager pins reads regardless of the connector, making
+ * the observable behavior the same. Scenario 5 (drop and recreate) includes a `cachingcat`
+ * variant because it differs: [[CachingInMemoryTableCatalog]] does not invalidate on
+ * drop/create, so `loadTable` still returns the old cached table object, CacheManager still
+ * matches, and stale data is served until REFRESH TABLE.
+ *
+ * Only external mutations are tested. Session DROP TABLE automatically uncaches the table
+ * (via the CacheManager), making a session drop+recreate scenario trivially different from
+ * the external variant.
+ *
+ * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames
+ * are lazy and require an action to trigger execution. In classic mode `.collect()` on
+ * DDL / DML is a no-op (these execute eagerly), so this is harmless.
+ */
+trait DSv2CacheTableReadTests extends DSv2ExternalMutationTestBase {
+
+  private def assertTableCached(session: SparkSession, tableName: String): Unit =
+    assert(session.catalog.isCached(tableName))
+
+  test(s"${testPrefix}SPARK-54022: cached table pinned against external data write") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        session.table(testTable).cache()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100)))
+
+        session.sql(s"REFRESH TABLE $testTable").collect()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54022: connector w/ cache: cached table pinned, " +
+      "REFRESH clears both layers") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
+
+        session.table(cachingTestTable).cache()
+        assertTableCached(session, cachingTestTable)
+        checkRows(session.table(cachingTestTable), Seq(Row(1, 100)))
+
+        val catalog =
+          getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        // Both CacheManager and connector cache are stale: external write invisible
+        assertTableCached(session, cachingTestTable)
+        checkRows(session.table(cachingTestTable), Seq(Row(1, 100)))
+
+        // REFRESH TABLE calls invalidateTable (clears connector cache) and rebuilds
+        // the CacheManager entry, so the external write becomes visible.
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        assertTableCached(session, cachingTestTable)
+        checkRows(session.table(cachingTestTable), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54022: session write invalidates cache, " +
+      "then external write invisible") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        session.table(testTable).cache()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100)))
+
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100), Row(2, 200)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(3, 300))
+
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100), Row(2, 200)))
+
+        session.sql(s"REFRESH TABLE $testTable").collect()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100), Row(2, 200), Row(3, 300)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54022: cached table pinned against external schema change") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        session.table(testTable).cache()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val addCol = TableChange.addColumn(Array("new_column"), IntegerType, true)
+        catalog.alterTable(testIdent, addCol)
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))
+
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100)))
+
+        session.sql(s"REFRESH TABLE $testTable").collect()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100, null), Row(2, 200, -1)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54022: session schema change invalidates cache, " +
+      "external write invisible") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        session.table(testTable).cache()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100)))
+
+        session.sql(s"ALTER TABLE $testTable ADD COLUMN new_column INT").collect()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100, null)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))
+
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100, null)))
+
+        session.sql(s"REFRESH TABLE $testTable").collect()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100, null), Row(2, 200, -1)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54022: cached table after external drop and " +
+      "recreate sees empty table") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        session.table(testTable).cache()
+        assertTableCached(session, testTable)
+        checkRows(session.table(testTable), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val originalTableId = catalog.loadTable(testIdent).id
+
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+
+        val newTableId = catalog.loadTable(testIdent).id
+        assert(originalTableId != newTableId)
+
+        val result = session.table(testTable)
+        assert(result.schema.fieldNames.toSeq == Seq("id", "salary"))
+        checkRows(result, Seq.empty)
+
+        // External drop+recreate produces a new table identity, so the prior cache entry
+        // is unreachable via name lookup (unlike external write/schema change where the
+        // cache stays pinned).
+        assert(!session.catalog.isCached(testTable))
+
+        session.sql(s"REFRESH TABLE $testTable").collect()
+        checkRows(session.table(testTable), Seq.empty)
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54022: connector w/ cache: cached table stale after " +
+      "external drop and recreate") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
+
+        session.table(cachingTestTable).cache()
+        assertTableCached(session, cachingTestTable)
+        checkRows(session.table(cachingTestTable), Seq(Row(1, 100)))
+
+        val catalog =
+          getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        val originalTableId = catalog.loadTable(testIdent).id
+
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+
+        // CachingInMemoryTableCatalog does not invalidate on drop/create, so loadTable
+        // still returns the old cached table object. CacheManager still matches and
+        // serves the stale cached data.
+        assertTableCached(session, cachingTestTable)
+        checkRows(session.table(cachingTestTable), Seq(Row(1, 100)))
+
+        // REFRESH TABLE calls invalidateTable (clears connector cache) and rebuilds
+        // the CacheManager entry, so the new empty table becomes visible.
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(session.table(cachingTestTable), Seq.empty)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala
new file mode 100644
index 0000000000000..0b2a50534447c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{BufferedRows, CatalogV2Util, Identifier, InMemoryBaseTable, TableCatalog, TableWritePrivilege}
+
+/**
+ * Base trait for DSv2 tests that involve external table mutations (writes, schema changes,
+ * drop/recreate) via the catalog API.
+ *
+ * Provides abstract methods so that the same test scenarios can run in both classic mode
+ * (where the test session IS the server session) and Connect mode (where the test session
+ * is a Connect client and catalog access requires the server session).
+ *
+ * Concrete suites override the abstract methods and mix in a test trait such as
+ * [[DSv2TempViewWithStoredPlanTests]], [[DSv2RepeatedTableAccessTests]],
+ * [[DSv2IncrementallyConstructedQueryTests]], or [[DSv2CacheTableReadTests]].
+ */
+trait DSv2ExternalMutationTestBase extends QueryTest {
+
+  /** Fully qualified table name under the non-caching test catalog. */
+  protected val testTable: String = "testcat.ns1.ns2.tbl"
+
+  /** Fully qualified table name under the caching test catalog. */
+  protected val cachingTestTable: String = "cachingcat.ns1.ns2.tbl"
+
+  /** Identifier for the test table within its namespace. */
+  protected val testIdent: Identifier = Identifier.of(Array("ns1", "ns2"), "tbl")
+
+  /** Prefix for test names, e.g. "" or "[connect] ". */
+  protected def testPrefix: String
+
+  /** Whether this suite runs under Spark Connect. */
+  protected def isConnect: Boolean
+
+  /** Execute a test body with a session. */
+  protected def withTestSession(fn: SparkSession => Unit): Unit
+
+  /**
+   * Assert that a DataFrame's rows match the expected rows (order-agnostic).
+   */
+  protected def checkRows(df: => DataFrame, expected: Seq[Row]): Unit
+
+  /**
+   * Get a [[TableCatalog]] by name from the underlying session.
+   */
+  protected def getTableCatalog[C <: TableCatalog: ClassTag](
+      session: SparkSession,
+      catalogName: String): C
+
+  /** Cleanup wrapper: drop views and the table after the test body, even on failure. */
+  protected def withTestTableAndViews(
+      session: SparkSession,
+      table: String,
+      views: Seq[String] = Seq.empty)(fn: => Unit): Unit
+
+  /** Appends a row to a DSv2 table via the catalog API, bypassing the session. */
+  protected def externalAppend(
+      catalog: TableCatalog,
+      ident: Identifier,
+      row: InternalRow): Unit = {
+    val extTable = catalog
+      .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT))
+      .asInstanceOf[InMemoryBaseTable]
+    val schema = CatalogV2Util.v2ColumnsToStructType(extTable.columns())
+    extTable.withData(Array(new BufferedRows(Seq.empty, schema).withRow(row)))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2IncrementallyConstructedQueryTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2IncrementallyConstructedQueryTests.scala
new file mode 100644
index 0000000000000..1dbaad18e3e71
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2IncrementallyConstructedQueryTests.scala
@@ -0,0 +1,495 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{Column, InMemoryTableCatalog, TableCatalog, TableChange, TableInfo}
+import org.apache.spark.sql.types.{IntegerType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Tests for incrementally constructed queries where df1 and df2 are analyzed at different
+ * times, then joined. The refresh phase in QueryExecution must align table versions across
+ * all references.
+ *
+ * Classic and Connect modes produce different results in some scenarios because in Connect
+ * mode, resolution is deferred until execution, so both sides of a join always see the
+ * latest table state.
+ *
+ * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames
+ * are lazy and require an action to trigger execution. In classic mode `.collect()` on
+ * eager statements (DDL, INSERT) is a no-op, so this is harmless.
+ */
+trait DSv2IncrementallyConstructedQueryTests extends DSv2ExternalMutationTestBase {
+
+  // ---------------------------------------------------------------------------
+  // Scenario 1: join after insert refreshes both sides to latest version.
+  // Both classic and Connect see the inserted data.
+  // ---------------------------------------------------------------------------
+
+  test(s"${testPrefix}SPARK-54157: join refreshes both sides after external insert" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        val df2 = session.table(testTable)
+
+        checkRows(
+          df1.join(df2, df1("id") === df2("id")),
+          Seq(Row(1, 100, 1, 100), Row(2, 200, 2, 200)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54157: join refreshes both sides after same-session insert" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect()
+
+        val df2 = session.table(testTable)
+
+        checkRows(
+          df1.join(df2, df1("id") === df2("id")),
+          Seq(Row(1, 100, 1, 100), Row(2, 200, 2, 200)))
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Scenario 2: join after ADD COLUMN.
+  // Classic: df1 keeps its original 2-column schema.
+  // Connect: re-resolves df1 with the new 3-column schema.
+  // ---------------------------------------------------------------------------
+
+  test(s"${testPrefix}SPARK-54157: join after external ADD COLUMN" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        catalog.alterTable(
+          testIdent, TableChange.addColumn(Array("new_column"), IntegerType, true))
+        externalAppend(
+          catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))
+
+        val df2 = session.table(testTable)
+        val selfJoin = df1.join(df2, df1("id") === df2("id"))
+
+        if (isConnect) {
+          // Connect re-resolves df1 with the new 3-column schema (id, salary, new_column).
+          assert(selfJoin.columns.length == 6,
+            s"Expected 6 columns (3 + 3) but got: ${selfJoin.columns.mkString(", ")}")
+          checkRows(selfJoin,
+            Seq(Row(1, 100, null, 1, 100, null), Row(2, 200, -1, 2, 200, -1)))
+        } else {
+          // Classic: df1 keeps its original 2-column schema (id, salary).
+          assert(selfJoin.columns.length == 5,
+            s"Expected 5 columns (2 + 3) but got: ${selfJoin.columns.mkString(", ")}")
+          checkRows(selfJoin,
+            Seq(Row(1, 100, 1, 100, null), Row(2, 200, 2, 200, -1)))
+        }
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54157: join after same-session ADD COLUMN" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+
+        session.sql(s"ALTER TABLE $testTable ADD COLUMN new_column INT").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200, -1)").collect()
+
+        val df2 = session.table(testTable)
+        val selfJoin = df1.join(df2, df1("id") === df2("id"))
+
+        if (isConnect) {
+          // Connect re-resolves df1 with the new 3-column schema (id, salary, new_column).
+          assert(selfJoin.columns.length == 6,
+            s"Expected 6 columns (3 + 3) but got: ${selfJoin.columns.mkString(", ")}")
+          checkRows(selfJoin,
+            Seq(Row(1, 100, null, 1, 100, null), Row(2, 200, -1, 2, 200, -1)))
+        } else {
+          // Classic: df1 keeps its original 2-column schema (id, salary).
+          assert(selfJoin.columns.length == 5,
+            s"Expected 5 columns (2 + 3) but got: ${selfJoin.columns.mkString(", ")}")
+          checkRows(selfJoin,
+            Seq(Row(1, 100, 1, 100, null), Row(2, 200, 2, 200, -1)))
+        }
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Scenario 3: join after DROP COLUMN.
+  // Classic: df1 references the dropped column, fails with COLUMNS_MISMATCH.
+  // Connect: re-resolves df1 without the dropped column, join succeeds.
+  // ---------------------------------------------------------------------------
+
+  test(s"${testPrefix}SPARK-54157: join after external DROP COLUMN" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        catalog.alterTable(
+          testIdent, TableChange.deleteColumn(Array("salary"), false))
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2))
+
+        val df2 = session.table(testTable)
+
+        if (isConnect) {
+          // Connect re-resolves df1 without the dropped column.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq(Row(1, 1), Row(2, 2)))
+        } else {
+          // Classic: df1 references the dropped column.
+          checkError(
+            exception = intercept[AnalysisException] {
+              df1.join(df2, df1("id") === df2("id")).collect()
+            },
+            condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+            matchPVals = true,
+            parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+        }
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54157: join after same-session DROP COLUMN" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+
+        session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (2)").collect()
+
+        val df2 = session.table(testTable)
+
+        if (isConnect) {
+          // Connect re-resolves df1 without the dropped column.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq(Row(1, 1), Row(2, 2)))
+        } else {
+          // Classic: df1 references the dropped column.
+          checkError(
+            exception = intercept[AnalysisException] {
+              df1.join(df2, df1("id") === df2("id")).collect()
+            },
+            condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+            matchPVals = true,
+            parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+        }
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Scenario 4: external drop and recreate table.
+  // 4a: table ID detects it, TABLE_ID_MISMATCH in classic, succeeds in Connect
+  // 4b: column IDs detect it, COLUMN_ID_MISMATCH in classic, succeeds in Connect
+  // 4c: no IDs, goes undetected, join succeeds (both modes)
+  // ---------------------------------------------------------------------------
+
+  test(s"${testPrefix}SPARK-54157: join after external table drop and recreate" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val originTableId = catalog.loadTable(testIdent).id
+
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        val df2 = session.table(testTable)
+        val newTableId = catalog.loadTable(testIdent).id
+        assert(originTableId != newTableId)
+
+        if (isConnect) {
+          // Connect re-resolves both sides to the recreated table.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq(Row(2, 200, 2, 200)))
+        } else {
+          // Classic: table ID changed.
+          checkError(
+            exception = intercept[AnalysisException] {
+              df1.join(df2, df1("id") === df2("id")).collect()
+            },
+            condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.TABLE_ID_MISMATCH",
+            matchPVals = true,
+            parameters = Map(
+              "tableName" -> ".*",
+              "capturedTableId" -> ".*",
+              "currentTableId" -> ".*"))
+        }
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54157: join after external drop/recreate" +
+      " (table without table ID support, but with column ID support)") {
+    val nullIdT = "nullidcat.ns1.ns2.tbl"
+    withTestSession { session =>
+      withTestTableAndViews(session, nullIdT) {
+        session.sql(s"CREATE TABLE $nullIdT (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $nullIdT VALUES (1, 100)").collect()
+
+        val df1 = session.table(nullIdT)
+        val catalog = getTableCatalog[TableCatalog](session, "nullidcat")
+        assert(catalog.loadTable(testIdent).id == null,
+          "NullTableIdInMemoryTableCatalog should produce null table IDs")
+
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        val df2 = session.table(nullIdT)
+
+        if (isConnect) {
+          // Connect re-resolves both sides to the recreated table.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq(Row(2, 200, 2, 200)))
+        } else {
+          // Classic: column IDs changed.
+          checkError(
+            exception = intercept[AnalysisException] {
+              df1.join(df2, df1("id") === df2("id")).collect()
+            },
+            condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+            matchPVals = true,
+            parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+        }
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54157: join does not detect external table drop and recreate" +
+      " (table without table ID support and without column ID support)") {
+    val nullBothT = "nullbothidscat.ns1.ns2.tbl"
+    withTestSession { session =>
+      withTestTableAndViews(session, nullBothT) {
+        session.sql(s"CREATE TABLE $nullBothT (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $nullBothT VALUES (1, 100)").collect()
+
+        val df1 = session.table(nullBothT)
+        val catalog = getTableCatalog[TableCatalog](
+          session, "nullbothidscat")
+        assert(catalog.loadTable(testIdent).id == null,
+          "NullTableIdAndNullColumnIdInMemoryTableCatalog should produce null table IDs")
+        assert(catalog.loadTable(testIdent).columns().forall(_.id() == null),
+          "NullTableIdAndNullColumnIdInMemoryTableCatalog should produce null column IDs")
+
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        val df2 = session.table(nullBothT)
+
+        if (isConnect) {
+          // Connect re-resolves both sides to the recreated table, so the join
+          // sees the row appended after recreate.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq(Row(2, 200, 2, 200)))
+        } else {
+          // Classic: neither TABLE_ID_MISMATCH nor COLUMN_ID_MISMATCH fires, so the
+          // drop and recreate goes undetected. df1 keeps its pre-drop snapshot
+          // (1, 100) while df2 reads the recreated table (2, 200), so the join finds
+          // no matching ids and returns no rows.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq.empty)
+        }
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Scenario 5: external drop+re-add column.
+  // 5a: column IDs detect it, COLUMN_ID_MISMATCH in classic, succeeds in Connect
+  // 5b: no IDs, goes undetected, join succeeds (both modes)
+  // ---------------------------------------------------------------------------
+
+  test(s"${testPrefix}SPARK-54157: join after external drop+re-add column" +
+      " (table without table ID support, but with column ID support)") {
+    val nullIdT = "nullidcat.ns1.ns2.tbl"
+    withTestSession { session =>
+      withTestTableAndViews(session, nullIdT) {
+        session.sql(s"CREATE TABLE $nullIdT (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $nullIdT VALUES (1, 100)").collect()
+
+        val df1 = session.table(nullIdT)
+
+        val catalog = getTableCatalog[TableCatalog](session, "nullidcat")
+        catalog.alterTable(
+          testIdent, TableChange.deleteColumn(Array("salary"), false))
+        catalog.alterTable(
+          testIdent, TableChange.addColumn(Array("salary"), IntegerType, true))
+
+        val df2 = session.table(nullIdT)
+
+        if (isConnect) {
+          // Connect re-resolves both sides with the new column ID.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq(Row(1, null, 1, null)))
+        } else {
+          // Classic: column ID changed.
+          checkError(
+            exception = intercept[AnalysisException] {
+              df1.join(df2, df1("id") === df2("id")).collect()
+            },
+            condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+            matchPVals = true,
+            parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+        }
+      }
+    }
+  }
+
+  test(s"${testPrefix}SPARK-54157: join does not detect external drop+re-add column" +
+      " (table without table ID support and without column ID support)") {
+    val nullBothT = "nullbothidscat.ns1.ns2.tbl"
+    withTestSession { session =>
+      withTestTableAndViews(session, nullBothT) {
+        session.sql(s"CREATE TABLE $nullBothT (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $nullBothT VALUES (1, 100)").collect()
+
+        val df1 = session.table(nullBothT)
+
+        val catalog = getTableCatalog[TableCatalog](
+          session, "nullbothidscat")
+        catalog.alterTable(
+          testIdent, TableChange.deleteColumn(Array("salary"), false))
+        catalog.alterTable(
+          testIdent, TableChange.addColumn(Array("salary"), IntegerType, true))
+
+        val df2 = session.table(nullBothT)
+
+        // Neither TABLE_ID_MISMATCH nor COLUMN_ID_MISMATCH fires.
+        // The change goes undetected and the join succeeds.
+        checkRows(
+          df1.join(df2, df1("id") === df2("id")),
+          Seq(Row(1, null, 1, null)))
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Scenario 6: external type change (drop INT column, add STRING column).
+  // The delete removes the old column ID and the add assigns a fresh one,
+  // so the column ID check fires (COLUMN_ID_MISMATCH) in classic before schema
+  // validation gets a chance to compare data types.
+  // Connect re-resolves both sides with the new column ID.
+  // ---------------------------------------------------------------------------
+
+  test(s"${testPrefix}SPARK-54157: join after external drop+re-add different-type column" +
+      " (table with both table and column ID support)") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+
+        val df1 = session.table(testTable)
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        catalog.alterTable(
+          testIdent, TableChange.deleteColumn(Array("salary"), false))
+        catalog.alterTable(
+          testIdent, TableChange.addColumn(Array("salary"), StringType, true))
+        externalAppend(catalog = catalog, ident = testIdent,
+          row = InternalRow(2, UTF8String.fromString("high")))
+
+        val df2 = session.table(testTable)
+
+        if (isConnect) {
+          // Connect re-resolves both sides with the new column type.
+          checkRows(
+            df1.join(df2, df1("id") === df2("id")),
+            Seq(Row(1, null, 1, null), Row(2, "high", 2, "high")))
+        } else {
+          // Classic: column ID changed.
+          checkError(
+            exception = intercept[AnalysisException] {
+              df1.join(df2, df1("id") === df2("id")).collect()
+            },
+            condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+            matchPVals = true,
+            parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala
new file mode 100644
index 0000000000000..533d10a949796
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, InMemoryTableCatalog, TableChange, TableInfo}
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Shared repeated table access tests with external changes for DSv2 tables. These tests verify
+ * that repeated `sql()` calls correctly reflect both session and external mutations:
+ *
+ *  - Scenario 1 (external writes): external data appended via the catalog API is visible.
+ *  - Scenario 2 (external schema changes): external ADD COLUMN via the catalog API is visible.
+ *  - Scenario 3 (external drop/recreate): external drop and recreate via the catalog API
+ *    resolves to the new empty table.
+ *
+ * Each scenario includes a session mutation baseline, an external mutation test, and a
+ * caching-connector variant showing stale results until `REFRESH TABLE`.
+ *
+ * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames
+ * are lazy and require an action to trigger execution. In classic mode `.collect()` on
+ * DDL / DML is a no-op (these execute eagerly), so this is harmless.
+ */
+trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase {
+
+  // Uses testTable, cachingTestTable, and testIdent from DSv2ExternalMutationTestBase.
+
+  // Scenario 1: data changes via writes
+
+  test(s"${testPrefix}repeated sql() reflects session write") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))
+
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}repeated sql() reflects external write") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}connector w/ cache: repeated sql() stale after external write") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        // Caching connector returns stale table: external write invisible
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, external write becomes visible
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 2: schema changes
+
+  test(s"${testPrefix}repeated sql() reflects session schema change") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))
+
+        session.sql(s"ALTER TABLE $testTable ADD COLUMN new_col INT").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200, -1)").collect()
+        checkRows(
+          session.sql(s"SELECT * FROM $testTable"),
+          Seq(Row(1, 100, null), Row(2, 200, -1)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}repeated sql() reflects external schema change") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true)
+        catalog.alterTable(testIdent, addCol)
+
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))
+
+        checkRows(
+          session.sql(s"SELECT * FROM $testTable"),
+          Seq(Row(1, 100, null), Row(2, 200, -1)))
+      }
+    }
+  }
+
+  test(s"${testPrefix}connector w/ cache: repeated sql() stale after external schema change") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true)
+        catalog.alterTable(testIdent, addCol)
+
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))
+
+        // Caching connector returns stale table: external changes invisible
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, schema change + data visible
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(
+          session.sql(s"SELECT * FROM $cachingTestTable"),
+          Seq(Row(1, 100, null), Row(2, 200, -1)))
+      }
+    }
+  }
+
+  // Scenario 3: drop and recreate table
+
+  test(s"${testPrefix}repeated sql() reflects session drop/recreate") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))
+
+        session.sql(s"DROP TABLE $testTable").collect()
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq.empty)
+      }
+    }
+  }
+
+  test(s"${testPrefix}repeated sql() reflects external drop/recreate") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+
+        checkRows(session.sql(s"SELECT * FROM $testTable"), Seq.empty)
+      }
+    }
+  }
+
+  test(s"${testPrefix}connector w/ cache: repeated sql() stale after external drop/recreate") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+
+        // Caching connector returns stale table: drop/recreate invisible
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, new empty table visible
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq.empty)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala
new file mode 100644
index 0000000000000..9f8a93e30550f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala
@@ -0,0 +1,586 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, InMemoryTableCatalog, TableChange, TableInfo}
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType}
+
+/**
+ * Shared temp view with stored plan tests for DSv2 tables. These tests verify that temp views
+ * backed by DSv2 tables correctly handle data changes, schema changes, and table recreation,
+ * both via session SQL and external catalog mutations.
+ *
+ * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames
+ * are lazy and require an action to trigger execution. In classic mode `.collect()` on DDL
+ * is a no-op (DDL executes eagerly), so this is harmless.
+ */
+trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase {
+
+  // Scenario 1.1 (session write)
+  test(s"${testPrefix}temp view with stored plan reflects session write") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect()
+        checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 1.2 (external write)
+  test(s"${testPrefix}temp view with stored plan reflects external write") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 1.2 connector w/ cache (external write, caching connector)
+  test(s"${testPrefix}connector w/ cache: temp view stale after external write") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))
+
+        // Caching connector returns stale table: external write invisible
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, external write becomes visible
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 2.1 (session ADD COLUMN)
+  test(s"${testPrefix}temp view with stored plan preserves schema after session ADD COLUMN") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        session.sql(s"ALTER TABLE $testTable ADD COLUMN new_column INT").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200, -1)").collect()
+
+        // view preserves original 2-column schema, filter still applied
+        checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 2.2 (external ADD COLUMN)
+  test(s"${testPrefix}temp view with stored plan preserves schema after external ADD COLUMN") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // external schema change via catalog API
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val addCol = TableChange.addColumn(Array("new_column"), IntegerType, true)
+        catalog.alterTable(testIdent, addCol)
+
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))
+
+        // view preserves original 2-column schema, filter still applied
+        checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 2.2 connector w/ cache (external ADD COLUMN, caching connector)
+  test(s"${testPrefix}connector w/ cache: temp view stale after external ADD COLUMN") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        val addCol = TableChange.addColumn(Array("new_column"), IntegerType, true)
+        catalog.alterTable(testIdent, addCol)
+
+        externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))
+
+        // Caching connector returns stale table: external changes invisible
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, view preserves original 2-column schema
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 3.1 (session column removal)
+  test(s"${testPrefix}temp view with stored plan detects session column removal") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect()
+
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` INT has been removed"))
+      }
+    }
+  }
+
+  // Scenario 3.2 (external column removal)
+  test(s"${testPrefix}temp view with stored plan detects external column removal") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val dropCol = TableChange.deleteColumn(Array("salary"), false)
+        catalog.alterTable(testIdent, dropCol)
+
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` INT has been removed"))
+      }
+    }
+  }
+
+  // Scenario 3.2 connector w/ cache (external column removal, caching connector)
+  test(s"${testPrefix}connector w/ cache: temp view stale after external column removal") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        val dropCol = TableChange.deleteColumn(Array("salary"), false)
+        catalog.alterTable(testIdent, dropCol)
+
+        // Caching connector returns stale table: column removal invisible, no error
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, column removal detected
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`cachingcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` INT has been removed"))
+      }
+    }
+  }
+
+  // Scenario 4.1 (session drop and recreate table)
+  test(s"${testPrefix}temp view with stored plan resolves to session-recreated table") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val originalTableId = catalog.loadTable(testIdent).id
+
+        session.sql(s"DROP TABLE $testTable").collect()
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+
+        val newTableId = catalog.loadTable(testIdent).id
+        assert(originalTableId != newTableId)
+
+        // view resolves to the new empty table
+        checkRows(session.table("v"), Seq.empty)
+
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect()
+        checkRows(session.table("v"), Seq(Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 4.2 (external drop and recreate table)
+  test(s"${testPrefix}temp view with stored plan resolves to externally recreated table") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val originalTableId = catalog.loadTable(testIdent).id
+
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+
+        val newTableId = catalog.loadTable(testIdent).id
+        assert(originalTableId != newTableId)
+
+        // view resolves to the new empty table
+        checkRows(session.table("v"), Seq.empty)
+
+        session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect()
+        checkRows(session.table("v"), Seq(Row(2, 200)))
+      }
+    }
+  }
+
+  // Scenario 4.2 connector w/ cache (external drop/recreate, caching connector)
+  test(s"${testPrefix}connector w/ cache: temp view stale after external drop/recreate") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        catalog.dropTable(testIdent)
+        catalog.createTable(
+          testIdent,
+          new TableInfo.Builder()
+            .withColumns(Array(
+              Column.create("id", IntegerType),
+              Column.create("salary", IntegerType)))
+            .build())
+
+        // Caching connector returns stale table: drop/recreate invisible
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, view resolves to new empty table
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(session.table("v"), Seq.empty)
+      }
+    }
+  }
+
+  // Scenario 5.1 (session drop and re-add column with same type, multiple views)
+  test(s"${testPrefix}temp view with stored plan after session drop and re-add column same type" +
+      " with unfiltered view") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v", "v_no_filter", "v_filter_is_null")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        session.table(testTable).createOrReplaceTempView("v_no_filter")
+        session.table(testTable).filter("salary IS NULL")
+          .createOrReplaceTempView("v_filter_is_null")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+        checkRows(session.table("v_no_filter"), Seq(Row(1, 100), Row(10, 1000)))
+        checkRows(session.table("v_filter_is_null"), Seq.empty)
+
+        // drop and re-add column with same name and type
+        session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect()
+        session.sql(s"ALTER TABLE $testTable ADD COLUMN salary INT").collect()
+
+        // salary values are now null, so the filtered view returns nothing
+        checkRows(session.table("v"), Seq.empty)
+        // unfiltered view returns rows with null salary
+        checkRows(session.table("v_no_filter"), Seq(Row(1, null), Row(10, null)))
+        // IS NULL filter now matches all rows
+        checkRows(session.table("v_filter_is_null"), Seq(Row(1, null), Row(10, null)))
+      }
+    }
+  }
+
+  // Scenario 5.2 (external drop and re-add column with same type)
+  test(s"${testPrefix}temp view with stored plan after external drop and re-add column " +
+      "same type") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v", "v_no_filter", "v_filter_is_null")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        session.table(testTable).createOrReplaceTempView("v_no_filter")
+        session.table(testTable).filter("salary IS NULL")
+          .createOrReplaceTempView("v_filter_is_null")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+        checkRows(session.table("v_no_filter"), Seq(Row(1, 100), Row(10, 1000)))
+        checkRows(session.table("v_filter_is_null"), Seq.empty)
+
+        // external drop and re-add column via catalog API
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val dropCol = TableChange.deleteColumn(Array("salary"), false)
+        val addCol = TableChange.addColumn(Array("salary"), IntegerType, true)
+        catalog.alterTable(testIdent, dropCol, addCol)
+
+        // salary values are now null, so the filtered view returns nothing
+        checkRows(session.table("v"), Seq.empty)
+        // unfiltered view returns rows with null salary
+        checkRows(session.table("v_no_filter"), Seq(Row(1, null), Row(10, null)))
+        // IS NULL filter now matches all rows
+        checkRows(session.table("v_filter_is_null"), Seq(Row(1, null), Row(10, null)))
+      }
+    }
+  }
+
+  // Scenario 5.2 connector w/ cache (external drop/re-add column, caching connector)
+  test(s"${testPrefix}connector w/ cache: temp view stale after external drop/re-add column " +
+      "same type") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        val dropCol = TableChange.deleteColumn(Array("salary"), false)
+        val addCol = TableChange.addColumn(Array("salary"), IntegerType, true)
+        catalog.alterTable(testIdent, dropCol, addCol)
+
+        // Caching connector returns stale table: column drop/re-add invisible
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, salary values are null
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkRows(session.table("v"), Seq.empty)
+      }
+    }
+  }
+
+  // Scenario 6.1 (session drop and re-add column with different type)
+  test(s"${testPrefix}temp view with stored plan detects session column type change") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect()
+        session.sql(s"ALTER TABLE $testTable ADD COLUMN salary STRING").collect()
+
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` type has changed from INT to STRING"))
+      }
+    }
+  }
+
+  // Scenario 6.2 (external drop and re-add column with different type)
+  test(s"${testPrefix}temp view with stored plan detects external column type change") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val dropCol = TableChange.deleteColumn(Array("salary"), false)
+        val addCol = TableChange.addColumn(Array("salary"), StringType, true)
+        catalog.alterTable(testIdent, dropCol, addCol)
+
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` type has changed from INT to STRING"))
+      }
+    }
+  }
+
+  // Scenario 6.2 connector w/ cache (external column type change, caching connector)
+  test(s"${testPrefix}connector w/ cache: temp view stale after external column type change") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        val dropCol = TableChange.deleteColumn(Array("salary"), false)
+        val addCol = TableChange.addColumn(Array("salary"), StringType, true)
+        catalog.alterTable(testIdent, dropCol, addCol)
+
+        // Caching connector returns stale table: type change invisible, no error
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, type change detected
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`cachingcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` type has changed from INT to STRING"))
+      }
+    }
+  }
+
+  // Scenario 7.1 (session type widening from INT to BIGINT)
+  test(s"${testPrefix}temp view with stored plan detects session type widening") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        session.sql(s"ALTER TABLE $testTable ALTER COLUMN salary TYPE LONG").collect()
+
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` type has changed from INT to BIGINT"))
+      }
+    }
+  }
+
+  // Scenario 7.2 (external type widening from INT to BIGINT)
+  test(s"${testPrefix}temp view with stored plan detects external type widening") {
+    withTestSession { session =>
+      withTestTableAndViews(session, testTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(testTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
+        val updateType = TableChange.updateColumnType(Array("salary"), LongType)
+        catalog.alterTable(testIdent, updateType)
+
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` type has changed from INT to BIGINT"))
+      }
+    }
+  }
+
+  // Scenario 7.2 connector w/ cache (external type widening, caching connector)
+  test(s"${testPrefix}connector w/ cache: temp view stale after external type widening") {
+    withTestSession { session =>
+      withTestTableAndViews(session, cachingTestTable, Seq("v")) {
+        session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
+        session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect()
+
+        session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v")
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
+        val updateType = TableChange.updateColumnType(Array("salary"), LongType)
+        catalog.alterTable(testIdent, updateType)
+
+        // Caching connector returns stale table: type change invisible, no error
+        checkRows(session.table("v"), Seq(Row(1, 100)))
+
+        // REFRESH TABLE invalidates the connector cache, type change detected
+        session.sql(s"REFRESH TABLE $cachingTestTable").collect()
+        checkError(
+          exception = intercept[AnalysisException] { session.table("v").collect() },
+          condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+          parameters = Map(
+            "viewName" -> "`v`",
+            "tableName" -> "`cachingcat`.`ns1`.`ns2`.`tbl`",
+            "colType" -> "data",
+            "errors" -> "- `salary` type has changed from INT to BIGINT"))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 4db59b36c1fec..97cdebe2d32df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -181,7 +181,7 @@ object InMemoryTableSessionCatalog {
 
 private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2SessionCatalogBase[T]]
   extends SharedSparkSession
-  with BeforeAndAfter {
+  with BeforeAndAfter { self: InsertIntoSQLOnlyTests =>
 
   protected def catalog(name: String): CatalogPlugin = {
     spark.sessionState.catalogManager.catalog(name)
@@ -215,6 +215,7 @@ private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2Sessio
     val t1 = "tbl"
     val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
     df.write.format(v2Format).saveAsTable(t1)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     verifyTable(t1, df)
   }
 
@@ -222,6 +223,7 @@ private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2Sessio
     val t1 = "tbl"
     val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
     df.write.format(v2Format).mode("append").saveAsTable(t1)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     verifyTable(t1, df)
   }
 
@@ -245,10 +247,12 @@ private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2Sessio
       df.select("id", "data").write.format(v2Format).saveAsTable(t1)
     }
     df.write.format(v2Format).mode("append").saveAsTable(t1)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     verifyTable(t1, df)
 
     // Check that appends are by name
     df.select($"data", $"id").write.format(v2Format).mode("append").saveAsTable(t1)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     verifyTable(t1, df.union(df))
   }
 
@@ -284,6 +288,7 @@ private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2Sessio
     val t1 = "tbl"
     val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
     df.write.format(v2Format).mode("ignore").saveAsTable(t1)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     verifyTable(t1, df)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
index eadaafea81a53..f272f28a5f92f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
@@ -21,13 +21,14 @@ import java.util
 import java.util.Collections
 
 import scala.jdk.CollectionConverters._
+import scala.reflect.ClassTag
 
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.QueryTest.withQueryExecutionsCaptured
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, ReplaceTableAsSelect}
-import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue, DefaultValue, Identifier, InMemoryTableCatalog, SupportsV1OverwriteWithSaveAsTable, TableInfo}
+import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, ColumnDefaultValue, ComposedColumnIdTableCatalog, DefaultValue, Identifier, InMemoryTableCatalog, MixedColumnIdTableCatalog, NullColumnIdInMemoryTableCatalog, NullTableIdAndNullColumnIdInMemoryTableCatalog, NullTableIdInMemoryTableCatalog, SupportsV1OverwriteWithSaveAsTable, TableCatalog, TableInfo, TypeChangeResetsColIdTableCatalog}
 import org.apache.spark.sql.connector.catalog.BasicInMemoryTableCatalog
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, UpdateColumnDefaultValue}
 import org.apache.spark.sql.connector.catalog.TableChange
@@ -38,14 +39,18 @@ import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTru
 import org.apache.spark.sql.execution.{QueryExecution, SparkPlan}
 import org.apache.spark.sql.execution.ExplainUtils.stripAQEPlan
 import org.apache.spark.sql.execution.datasources.v2.{AlterTableExec, CreateTableExec, DataSourceV2Relation, ReplaceTableExec}
-import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.functions.{lit, sum}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{BooleanType, CalendarIntervalType, DoubleType, IntegerType, StringType, TimestampType}
+import org.apache.spark.sql.types.{BooleanType, CalendarIntervalType, DoubleType, IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.unsafe.types.UTF8String
 
 class DataSourceV2DataFrameSuite
-  extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false) {
+  extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false)
+  with DSv2TempViewWithStoredPlanTests
+  with DSv2RepeatedTableAccessTests
+  with DSv2IncrementallyConstructedQueryTests
+  with DSv2CacheTableReadTests {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   import testImplicits._
 
@@ -54,8 +59,30 @@ class DataSourceV2DataFrameSuite
     .set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
     .set("spark.sql.catalog.testcat.copyOnLoad", "true")
     .set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.cachingcat",
+      classOf[CachingInMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.cachingcat.copyOnLoad", "true")
+    .set("spark.sql.catalog.nullidcat",
+      classOf[NullTableIdInMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.nullidcat.copyOnLoad", "true")
+    .set("spark.sql.catalog.nullcolidcat",
+      classOf[NullColumnIdInMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.nullcolidcat.copyOnLoad", "true")
+    .set("spark.sql.catalog.nullbothidscat",
+      classOf[NullTableIdAndNullColumnIdInMemoryTableCatalog].getName)
+    .set("spark.sql.catalog.nullbothidscat.copyOnLoad", "true")
+    .set("spark.sql.catalog.resetidcat",
+      classOf[TypeChangeResetsColIdTableCatalog].getName)
+    .set("spark.sql.catalog.resetidcat.copyOnLoad", "true")
+    .set("spark.sql.catalog.mixedcolidcat",
+      classOf[MixedColumnIdTableCatalog].getName)
+    .set("spark.sql.catalog.mixedcolidcat.copyOnLoad", "true")
+    .set("spark.sql.catalog.composedidcat",
+      classOf[ComposedColumnIdTableCatalog].getName)
+    .set("spark.sql.catalog.composedidcat.copyOnLoad", "true")
 
   after {
+    catalog("cachingcat").asInstanceOf[CachingInMemoryTableCatalog].clearCache()
     spark.sessionState.catalogManager.reset()
   }
 
@@ -67,6 +94,36 @@ class DataSourceV2DataFrameSuite
     catalog.asInstanceOf[InMemoryTableCatalog]
   }
 
+  // DSv2ExternalMutationTestBase implementations for classic mode
+  override protected def testPrefix: String = ""
+  override protected def isConnect: Boolean = false
+
+  override protected def withTestSession(fn: SparkSession => Unit): Unit = fn(spark)
+
+  override protected def checkRows(df: => DataFrame, expected: Seq[Row]): Unit =
+    checkAnswer(df, expected)
+
+  override protected def getTableCatalog[C <: TableCatalog: ClassTag](
+      session: SparkSession,
+      catalogName: String): C = {
+    val c = catalog(catalogName)
+    val ct = implicitly[ClassTag[C]]
+    require(
+      ct.runtimeClass.isInstance(c),
+      s"Expected ${ct.runtimeClass.getName} but got ${c.getClass.getName}")
+    c.asInstanceOf[C]
+  }
+
+  override protected def withTestTableAndViews(
+      session: SparkSession,
+      table: String,
+      views: Seq[String] = Seq.empty)(fn: => Unit): Unit = {
+    withTable(table) {
+      try { fn }
+      finally { views.foreach(v => session.sql(s"DROP VIEW IF EXISTS $v")) }
+    }
+  }
+
   override def verifyTable(tableName: String, expected: DataFrame): Unit = {
     checkAnswer(spark.table(tableName), expected)
   }
@@ -87,7 +144,9 @@ class DataSourceV2DataFrameSuite
       sql(s"CREATE TABLE $t2 (id bigint, data string) USING foo")
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
       df.write.insertInto(t1)
+      checkInsertMetrics(t1, numInsertedRows = 3)
       spark.table(t1).write.insertInto(t2)
+      checkInsertMetrics(t2, numInsertedRows = 3)
       checkAnswer(spark.table(t2), df)
     }
   }
@@ -97,6 +156,7 @@ class DataSourceV2DataFrameSuite
     withTable(t1) {
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
       df.write.saveAsTable(t1)
+      checkInsertMetrics(t1, numInsertedRows = 3)
       checkAnswer(spark.table(t1), df)
     }
   }
@@ -114,6 +174,7 @@ class DataSourceV2DataFrameSuite
 
       // appends are by name not by position
       df.select($"data", $"id").write.mode("append").saveAsTable(t1)
+      checkInsertMetrics(t1, numInsertedRows = 3)
       checkAnswer(spark.table(t1), df)
     }
   }
@@ -142,6 +203,7 @@ class DataSourceV2DataFrameSuite
     withTable(t1) {
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
       df.write.mode("ignore").saveAsTable(t1)
+      checkInsertMetrics(t1, numInsertedRows = 3)
       checkAnswer(spark.table(t1), df)
     }
   }
@@ -175,6 +237,7 @@ class DataSourceV2DataFrameSuite
 
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
       df.write.option("other", "20").mode("append").saveAsTable(t1)
+      checkInsertMetrics(t1, numInsertedRows = 3)
 
       sparkContext.listenerBus.waitUntilEmpty()
       plan match {
@@ -376,24 +439,29 @@ class DataSourceV2DataFrameSuite
 
       val df1 = Seq((1, "hr")).toDF("id", "dep")
       df1.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 1)
 
       sql(s"ALTER TABLE $tableName ADD COLUMN txt STRING DEFAULT 'initial-text'")
 
       val df2 = Seq((2, "hr"), (3, "software")).toDF("id", "dep")
       df2.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 2)
 
       sql(s"ALTER TABLE $tableName ALTER COLUMN txt SET DEFAULT 'new-text'")
 
       val df3 = Seq((4, "hr"), (5, "hr")).toDF("id", "dep")
       df3.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 2)
 
       val df4 = Seq((6, "hr", null), (7, "hr", "explicit-text")).toDF("id", "dep", "txt")
       df4.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 2)
 
       sql(s"ALTER TABLE $tableName ALTER COLUMN txt DROP DEFAULT")
 
       val df5 = Seq((8, "hr"), (9, "hr")).toDF("id", "dep")
       df5.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 2)
 
       checkAnswer(
         sql(s"SELECT * FROM $tableName"),
@@ -417,11 +485,13 @@ class DataSourceV2DataFrameSuite
 
       val df1 = Seq(1, 2).toDF("id")
       df1.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 2)
 
       sql(s"ALTER TABLE $tableName ALTER COLUMN dep SET DEFAULT 'it'")
 
       val df2 = Seq(3, 4).toDF("id")
       df2.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 2)
 
       checkAnswer(
         sql(s"SELECT * FROM $tableName"),
@@ -435,6 +505,7 @@ class DataSourceV2DataFrameSuite
 
       val df3 = Seq(1, 2).toDF("id")
       df3.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 2)
 
       checkAnswer(
         sql(s"SELECT * FROM $tableName"),
@@ -478,11 +549,13 @@ class DataSourceV2DataFrameSuite
 
       val df1 = Seq(1).toDF("id")
       df1.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 1)
 
       sql(s"ALTER TABLE $tableName ALTER COLUMN dep SET DEFAULT ('i' || 't')")
 
       val df2 = Seq(2).toDF("id")
       df2.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 1)
 
       checkAnswer(
         sql(s"SELECT * FROM $tableName"),
@@ -521,6 +594,7 @@ class DataSourceV2DataFrameSuite
 
       val df3 = Seq(1).toDF("id")
       df3.writeTo(tableName).append()
+      checkInsertMetrics(tableName, numInsertedRows = 1)
 
       checkAnswer(
         sql(s"SELECT * FROM $tableName"),
@@ -1286,13 +1360,34 @@ class DataSourceV2DataFrameSuite
       // remove nested field from struct column
       sql(s"ALTER TABLE $t DROP COLUMN person.age")
 
-      // execution should fail with column mismatch
+      // The standard InMemoryTableCatalog preserves column IDs across type
+      // changes. Dropping a nested field
+      // changes the parent struct type but keeps the same column ID, so
+      // schema validation catches the type mismatch.
       checkError(
         exception = intercept[AnalysisException] { df.collect() },
         condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
-        parameters = Map(
-          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
-          "errors" -> "- `person`.`age` INT has been removed"))
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("SPARK-54157: allow nested struct field addition after DataFrame analysis") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice'))")
+
+      // create DataFrame and trigger analysis
+      val df = spark.table(t)
+
+      // add nested field to struct column
+      sql(s"ALTER TABLE $t ADD COLUMN person.age INT")
+      sql(s"INSERT INTO $t VALUES (2, named_struct('name', 'Bob', 'age', 25))")
+
+      // stale DataFrame reads use ALLOW_NEW_FIELDS mode, so adding nested
+      // fields is permitted. The stale DataFrame reads the original columns.
+      checkAnswer(df, Seq(Row(1, Row("Alice")), Row(2, Row("Bob"))))
     }
   }
 
@@ -1506,289 +1601,1465 @@ class DataSourceV2DataFrameSuite
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table allows top-level column additions") {
+  // Column ID tests: Mismatch detection
+  //
+  // Core behavior: when a DataFrame captures column IDs at analysis time,
+  // and those IDs change before execution, the query is rejected with
+  // COLUMN_ID_MISMATCH.
+
+  test("drop+re-add column with same name and type rejects stale DataFrame") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
 
-      // create temp view using DataFrame API
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
-
-      // add top-level column to underlying table
-      sql(s"ALTER TABLE $t ADD COLUMN age int")
+      val df = spark.table(t)
 
-      // accessing temp view should succeed as top-level column additions are allowed
-      // view captures original columns
-      checkAnswer(spark.table("v"), Seq.empty)
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
 
-      // insert data to verify view still works correctly
-      sql(s"INSERT INTO $t VALUES (1, 'a', 25)")
-      checkAnswer(spark.table("v"), Seq(Row(1, "a")))
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table detects nested column additions") {
+  test("drop+re-add column with different type rejects stale DataFrame") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, address STRUCT<street: STRING, city: STRING>) USING foo")
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
 
-      // create temp view using DataFrame API
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
+      val df = spark.table(t)
 
-      // add nested column to underlying table
-      sql(s"ALTER TABLE $t ADD COLUMN address.zipCode string")
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary STRING")
 
-      // accessing temp view should detect schema change for nested additions
       checkError(
-        exception = intercept[AnalysisException] { spark.table("v").collect() },
-        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
-        parameters = Map(
-          "viewName" -> "`v`",
-          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
-          "colType" -> "data",
-          "errors" -> "- `address`.`zipCode` STRING has been added"))
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table detects removed columns") {
+  test("drop+re-add column with different case rejects stale DataFrame") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, age int) USING foo")
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
 
-      // create temp view
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
+      val df = spark.table(t)
 
-      // drop column from underlying table
-      sql(s"ALTER TABLE $t DROP COLUMN age")
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN SALARY INT")
 
-      // accessing temp view should detect schema change
       checkError(
-        exception = intercept[AnalysisException] { spark.table("v").collect() },
-        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
-        parameters = Map(
-          "viewName" -> "`v`",
-          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
-          "colType" -> "data",
-          "errors" -> "- `age` INT has been removed"))
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table detects nullability changes") {
+  test("drop+re-add multiple columns reports all mismatches") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string NOT NULL) USING foo")
+      sql(s"CREATE TABLE $t (id INT, salary INT, bonus INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100, 10)")
 
-      // create temp view
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
+      val df = spark.table(t)
 
-      // change nullability constraint using ALTER TABLE
-      sql(s"ALTER TABLE $t ALTER COLUMN data DROP NOT NULL")
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t DROP COLUMN bonus")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+      sql(s"ALTER TABLE $t ADD COLUMN bonus INT")
 
-      // accessing temp view should detect schema change
       checkError(
-        exception = intercept[AnalysisException] { spark.table("v").collect() },
-        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
-        parameters = Map(
-          "viewName" -> "`v`",
-          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
-          "colType" -> "data",
-          "errors" -> "- `data` is nullable now"))
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*",
+          "errors" -> "(?s).*salary.*bonus.*"))
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table accepts table ID changes") {
+  // With case-sensitive analysis, "salary" and "SALARY" are different columns.
+  // The column ID check skips (original "salary" not found in current table),
+  // and the schema check fires instead (column missing).
+  test("case-sensitive mode: different case column name triggers schema mismatch") {
     val t = "testcat.ns1.ns2.tbl"
-    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-      val df = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
-      df.write.insertInto(t)
-
-      // create temp view
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), df)
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      withTable(t) {
+        sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+        sql(s"INSERT INTO $t VALUES (1, 100)")
+        val df = spark.table(t)
 
-      // capture the original table ID
-      val originalTableId = catalog("testcat").loadTable(ident).id
+        sql(s"ALTER TABLE $t DROP COLUMN salary")
+        sql(s"ALTER TABLE $t ADD COLUMN SALARY INT")
 
-      // drop and recreate table (this changes the table ID)
-      sql(s"DROP TABLE $t")
-      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+        checkError(
+          exception = intercept[AnalysisException] { df.collect() },
+          condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+          matchPVals = true,
+          parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+      }
+    }
+  }
 
-      // verify table ID changed
-      val newTableId = catalog("testcat").loadTable(ident).id
-      assert(originalTableId != newTableId)
+  test("drop+re-add column with mixed case type rejects stale DataFrame") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+      val df = spark.table(t)
 
-      // accessing temp view should work despite table ID change (returns empty data)
-      checkAnswer(spark.table("v"), Seq.empty)
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary InT")
 
-      // insert new data and verify view reflects it
-      val newDF = Seq((3L, "c"), (4L, "d")).toDF("id", "data")
-      newDF.write.insertInto(t)
-      checkAnswer(spark.table("v"), newDF)
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
     }
   }
 
-  test("SPARK-53924: createOrReplaceTempView works after schema change") {
+  test("column addition does not trigger column ID mismatch") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data STRING, extra INT) USING foo")
-
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
-
-      // alter table
-      sql(s"ALTER TABLE $t DROP COLUMN extra")
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
 
-      // old view fails
-      intercept[AnalysisException] { spark.table("v").collect() }
+      val df = spark.table(t)
 
-      // recreate view with updated schema
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
+      sql(s"ALTER TABLE $t ADD COLUMN bonus INT")
+      sql(s"INSERT INTO $t VALUES (2, 200, 50)")
 
-      // now it should work with new schema
-      val df = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
-      df.write.insertInto(t)
-      checkAnswer(spark.table("v"), df)
+      checkAnswer(df, Seq(Row(1, 100), Row(2, 200)))
     }
   }
 
+  // Column ID tests: Complex types
 
-  test("SPARK-53924: temp view on DSv2 table with read options") {
+  test("drop+re-add array column rejects stale DataFrame") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-
-      // create temp view with options
-      val df = spark.read.option("fakeOption", "testValue").table(t)
-      df.createOrReplaceTempView("v")
-
-      // verify options are preserved in the view
-      val options = spark.table("v").queryExecution.analyzed.collectFirst {
-        case d: DataSourceV2Relation => d.options
-      }.get
-      assert(options.get("fakeOption") == "testValue")
-
-      // add top-level column to underlying table
-      sql(s"ALTER TABLE $t ADD COLUMN age int")
+      sql(s"CREATE TABLE $t (id INT, tags ARRAY<STRING>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, array('a', 'b'))")
+      val df = spark.table(t)
 
-      // accessing temp view should succeed as top-level column additions are allowed
+      sql(s"ALTER TABLE $t DROP COLUMN tags")
+      sql(s"ALTER TABLE $t ADD COLUMN tags ARRAY<STRING>")
 
-      checkAnswer(spark.table("v"), Seq.empty)
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table created using SQL with plan and top-level additions") {
+  test("drop+re-add map column rejects stale DataFrame") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      withSQLConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW.key -> "true") {
-        sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      sql(s"CREATE TABLE $t (id INT, props MAP<STRING, INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, map('x', 1))")
+      val df = spark.table(t)
 
-        // create temp view using SQL that should capture plan
-        sql(s"CREATE OR REPLACE TEMPORARY VIEW v AS SELECT * FROM $t")
-        checkAnswer(spark.table("v"), Seq.empty)
+      sql(s"ALTER TABLE $t DROP COLUMN props")
+      sql(s"ALTER TABLE $t ADD COLUMN props MAP<STRING, INT>")
 
-        // verify that view stores analyzed plan
-        val Some(view) = spark.sessionState.catalog.getRawTempView("v")
-        assert(view.plan.isDefined)
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
 
-        // add top-level column to underlying table
-        sql(s"ALTER TABLE $t ADD COLUMN age int")
+  test("drop+re-add nested struct field rejects stale DataFrame") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
 
-        // accessing temp view should succeed as top-level column additions are allowed
-        checkAnswer(spark.table("v"), Seq.empty)
+      sql(s"ALTER TABLE $t DROP COLUMN person.age")
+      sql(s"ALTER TABLE $t ADD COLUMN person.age INT")
 
-        // insert data to verify view still works correctly
-        sql(s"INSERT INTO $t VALUES (1, 'a', 25)")
-        checkAnswer(spark.table("v"), Seq(Row(1, "a")))
-      }
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table detects VARCHAR/CHAR type changes") {
+  // The standard InMemoryTableCatalog preserves column IDs across type
+  // changes. Adding a nested field keeps
+  // the same column ID but changes the struct type. Column ID check passes,
+  // and since the query uses ALLOW_NEW_FIELDS mode (reads allow new fields),
+  // adding a nested struct field is permitted.
+  test("same column ID but expanded struct type: read tolerates nested field addition") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, name VARCHAR(10)) USING foo")
-
-      // create temp view
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice'))")
+      val df = spark.table(t)
 
-      // change VARCHAR(10) to VARCHAR(20)
-      sql(s"ALTER TABLE $t ALTER COLUMN name TYPE VARCHAR(20)")
+      // add nested field; the standard catalog preserves
+      // the person column ID despite the type change
+      sql(s"ALTER TABLE $t ADD COLUMN person.age INT")
 
-      // accessing temp view should detect type change
-      checkError(
-        exception = intercept[AnalysisException] { spark.table("v").collect() },
-        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
-        parameters = Map(
-          "viewName" -> "`v`",
-          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
-          "colType" -> "data",
-          "errors" -> "- `name` type has changed from VARCHAR(10) to VARCHAR(20)"))
+      // Read queries use ALLOW_NEW_FIELDS mode, so adding nested fields is
+      // permitted. The stale DataFrame reads the original columns successfully.
+      checkAnswer(df, Seq(Row(1, Row("Alice"))))
     }
   }
 
-  test("SPARK-53924: temp view on DSv2 table works after inserting data") {
-    val t = "testcat.ns1.ns2.tbl"
+  test("add field to array element struct rejects stale DataFrame") {
+    val t = "resetidcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      sql(s"CREATE TABLE $t (id INT, items ARRAY<STRUCT<name: STRING>>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, array(named_struct('name', 'x')))")
+      val df = spark.table(t)
 
-      // create temp view
-      spark.table(t).createOrReplaceTempView("v")
-      checkAnswer(spark.table("v"), Seq.empty)
+      sql(s"ALTER TABLE $t ADD COLUMN items.element.price INT")
 
-      // insert data into underlying table (no schema change)
-      val df = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
-      df.write.insertInto(t)
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
 
-      // accessing temp view should work and reflect new data
-      checkAnswer(spark.table("v"), df)
+  test("add field to map value struct rejects stale DataFrame") {
+    val t = "resetidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, props MAP<STRING, STRUCT<v: INT>>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, map('a', named_struct('v', 1)))")
+      val df = spark.table(t)
 
-      // insert more data
-      val df2 = Seq((3L, "c"), (4L, "d")).toDF("id", "data")
-      df2.write.insertInto(t)
+      sql(s"ALTER TABLE $t ADD COLUMN props.value.label STRING")
 
-      // view should reflect all data
-      checkAnswer(spark.table("v"), df.union(df2))
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
     }
   }
 
-  test("cached DSv2 table DataFrame is refreshed and reused after insert") {
+  test("inserting new data into array column does not trigger column ID mismatch") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-      val df1 = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
-      df1.write.insertInto(t)
-
-      // cache DataFrame pointing to table
-      val readDF1 = spark.table(t)
-      readDF1.cache()
-      assertCached(readDF1)
-      checkAnswer(readDF1, Seq(Row(1L, "a"), Row(2L, "b")))
+      sql(s"CREATE TABLE $t (id INT, tags ARRAY<STRING>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, array('a', 'b'))")
+      val df = spark.table(t)
 
-      // insert more data, invalidating and refreshing cache entry
-      val df2 = Seq((3L, "c"), (4L, "d")).toDF("id", "data")
-      df2.write.insertInto(t)
+      sql(s"INSERT INTO $t VALUES (2, array('c', 'd', 'e'))")
 
-      // verify underlying plan is recached and picks up new data
-      val readDF2 = spark.table(t)
-      assertCached(readDF2)
-      checkAnswer(readDF2, Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, "d")))
+      checkAnswer(df, Seq(
+        Row(1, Seq("a", "b")),
+        Row(2, Seq("c", "d", "e"))))
     }
   }
 
-  test("SPARK-54022: caching table via Dataset API should pin table state") {
+  test("inserting new data into map column does not trigger column ID mismatch") {
     val t = "testcat.ns1.ns2.tbl"
-    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
     withTable(t) {
-      sql(s"CREATE TABLE $t (id INT, value INT, category STRING) USING foo")
-      sql(s"INSERT INTO $t VALUES (1, 10, 'A'), (2, 20, 'B'), (3, 30, 'A')")
+      sql(s"CREATE TABLE $t (id INT, props MAP<STRING, INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, map('x', 1))")
+      val df = spark.table(t)
+
+      sql(s"INSERT INTO $t VALUES (2, map('y', 2, 'z', 3))")
+
+      checkAnswer(df, Seq(
+        Row(1, Map("x" -> 1)),
+        Row(2, Map("y" -> 2, "z" -> 3))))
+    }
+  }
+
+  test("inserting new data into struct column does not trigger column ID mismatch") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
+
+      sql(s"INSERT INTO $t VALUES (2, named_struct('name', 'Bob', 'age', 25))")
+
+      checkAnswer(df, Seq(
+        Row(1, Row("Alice", 30)),
+        Row(2, Row("Bob", 25))))
+    }
+  }
+
+  // Column ID tests: Composed nested IDs
+  //
+  // ComposedColumnIdTableCatalog encodes nested field IDs into the
+  // top-level Column.id() string, modeling the recommended adoption
+  // pattern for connectors with nested IDs. Any nested
+  // change produces a different encoded string, so validateColumnIds
+  // detects it even though Spark only compares top-level strings.
+
+  test("composed nested IDs detect drop+re-add of nested field") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN person.age")
+      sql(s"ALTER TABLE $t ADD COLUMN person.age INT")
+
+      // The inner age field got a new nested ID on re-add. The composed
+      // top-level string changes, so COLUMN_ID_MISMATCH fires.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("composed nested IDs tolerate same data inserted into nested column") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
+
+      // pure data insert, no schema change: composed IDs stay the same
+      sql(s"INSERT INTO $t VALUES (2, named_struct('name', 'Bob', 'age', 25))")
+
+      checkAnswer(df, Seq(
+        Row(1, Row("Alice", 30)),
+        Row(2, Row("Bob", 25))))
+    }
+  }
+
+  // Column ID tests: Additional nested coverage
+  //
+  // These tests fill specific nested cells that are not covered by the
+  // coarse (testcat) or composed (composedidcat) groups above.
+
+  // Nested type change with preserved top-level ID: the standard catalog
+  // preserves the parent ID, so schema validation catches the incompatible
+  // nested type change.
+  test("nested type change with preserved ID caught by schema validation") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t ALTER COLUMN person.age TYPE LONG")
+
+      // Top-level person ID is preserved (standard catalog behavior).
+      // Schema validation catches the nested type mismatch.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Depth >= 3 nesting with composed IDs: drop+re-add at depth 3 produces
+  // a different composed ID at the top level.
+  test("depth 3 nesting with composed IDs detects deep field change") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, a STRUCT<b: STRUCT<c: INT>>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('b', named_struct('c', 42)))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN a.b.c")
+      sql(s"ALTER TABLE $t ADD COLUMN a.b.c INT")
+
+      // The deep nested field c got a new ID on re-add, changing the
+      // composed top-level ID for column a.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Nested rename changes the parent struct type (different field name),
+  // so schema validation catches it. The top-level ID is preserved
+  // because the standard catalog matches by column name.
+  test("non-composed catalog: nested rename caught by " +
+      "data columns validation") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t RENAME COLUMN person.name TO first_name")
+
+      // Top-level person ID is preserved. The struct type changed
+      // (field name differs), so schema validation fires.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: Top-level ID preservation across nested changes
+  //
+  // The standard InMemoryTableCatalog preserves top-level column IDs when
+  // nested fields are added or dropped (assignMissingIds matches by name
+  // only). These tests verify that behavior using the catalog API.
+
+  // Column ID tests: Composed IDs for container types (arrays, maps)
+  //
+  // ComposedColumnIdTableCatalog encodes nested field IDs into the
+  // top-level string. These tests verify detection of nested drop+re-add
+  // inside array element structs and map value structs.
+
+  test("composed nested IDs detect drop+re-add in array element struct") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, items ARRAY<STRUCT<name: STRING, price: INT>>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, array(named_struct('name', 'x', 'price', 10)))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN items.element.price")
+      sql(s"ALTER TABLE $t ADD COLUMN items.element.price INT")
+
+      // The nested price field got a new ID on re-add. The composed
+      // top-level ID for items changes, so COLUMN_ID_MISMATCH fires.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("composed nested IDs detect drop+re-add in map value struct") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, props MAP<STRING, STRUCT<x: INT, y: INT>>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, map('k1', named_struct('x', 10, 'y', 20)))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN props.value.y")
+      sql(s"ALTER TABLE $t ADD COLUMN props.value.y INT")
+
+      // The nested y field got a new ID on re-add. The composed
+      // top-level ID for props changes, so COLUMN_ID_MISMATCH fires.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("composed nested IDs detect rename within struct") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t RENAME COLUMN person.name TO first_name")
+
+      // With position-based keys, the renamed field stays at position 0
+      // and keeps its nested ID. The composed string is unchanged, so
+      // schema validation catches the struct type difference instead.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("composed nested IDs: reorder preserves composed column ID") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+
+      val cat = catalog("composedidcat")
+      val personBefore = cat.loadTable(ident).columns().find(_.name() == "person").get
+      val idBefore = personBefore.id()
+      val typeBefore = personBefore.dataType()
+
+      // REPLACE COLUMNS does not support nested fields, so use ALTER
+      // COLUMN ... FIRST to reorder within the struct.
+      sql(s"ALTER TABLE $t ALTER COLUMN person.age FIRST")
+
+      val personAfter = cat.loadTable(ident).columns().find(_.name() == "person").get
+      val idAfter = personAfter.id()
+      val typeAfter = personAfter.dataType()
+
+      // Confirm the schema actually changed (age moved to first position).
+      assert(typeBefore != typeAfter,
+        "Struct field order should have changed after ALTER COLUMN ... FIRST")
+      assert(typeAfter.toString.startsWith("StructType(StructField(age"),
+        s"age should be first field after reorder, got: $typeAfter")
+
+      // Position-based keys: each ordinal position keeps its old ID after
+      // reorder, so the composed string is unchanged despite the schema change.
+      assert(idBefore == idAfter,
+        s"Composed ID should be unchanged after reorder: $idBefore vs $idAfter")
+    }
+  }
+
+  test("composed nested IDs tolerate nested field reorder end-to-end") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, person STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t ALTER COLUMN person.age FIRST")
+
+      // InMemoryTable does not actually reorder nested struct fields in stored
+      // data, so the read still returns the original field order. This is fine
+      // because the purpose of this test is to verify that the column ID check
+      // passes (no COLUMN_ID_MISMATCH) after a nested field reorder.
+      checkAnswer(df, Seq(Row(1, Row("Alice", 30))))
+    }
+  }
+
+  test("composed nested IDs detect drop+re-add in map key struct") {
+    val t = "composedidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t " +
+        s"(id INT, coords MAP<STRUCT<x: INT, y: INT>, STRING>) USING foo")
+      sql(s"INSERT INTO $t VALUES " +
+        s"(1, map(named_struct('x', 1, 'y', 2), 'origin'))")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN coords.key.y")
+      sql(s"ALTER TABLE $t ADD COLUMN coords.key.y INT")
+
+      // The nested y field in the map key struct got a new ID on re-add.
+      // The composed top-level ID for coords changes.
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("non-composed catalog: top-level ID preserved when nested field dropped") {
+    val t = "testcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, info STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+
+      val cat = catalog("testcat")
+      val colsBefore = cat.loadTable(ident).columns()
+      val idColId = colsBefore.find(_.name() == "id").get.id()
+      val infoColId = colsBefore.find(_.name() == "info").get.id()
+
+      sql(s"ALTER TABLE $t DROP COLUMN info.name")
+
+      val colsAfter = cat.loadTable(ident).columns()
+      // Parent struct column ID is preserved after nested field drop
+      assert(colsAfter.find(_.name() == "info").get.id() == infoColId)
+      // Sibling column ID is preserved
+      assert(colsAfter.find(_.name() == "id").get.id() == idColId)
+
+      // Data verification: age is intact after dropping name
+      checkAnswer(
+        sql(s"SELECT id, info.age FROM $t"),
+        Seq(Row(1, 30)))
+    }
+  }
+
+  test("non-composed catalog: top-level ID preserved when nested field added") {
+    val t = "testcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, info STRUCT<name: STRING, age: INT>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, named_struct('name', 'Alice', 'age', 30))")
+
+      val cat = catalog("testcat")
+      val colsBefore = cat.loadTable(ident).columns()
+      val idColId = colsBefore.find(_.name() == "id").get.id()
+      val infoColId = colsBefore.find(_.name() == "info").get.id()
+
+      sql(s"ALTER TABLE $t ADD COLUMN info.email STRING")
+
+      val colsAfter = cat.loadTable(ident).columns()
+      // Parent struct column ID is preserved after nested field addition
+      assert(colsAfter.find(_.name() == "info").get.id() == infoColId)
+      // Sibling column ID is preserved
+      assert(colsAfter.find(_.name() == "id").get.id() == idColId)
+
+      // Old row has NULL for new field, new row has value
+      sql(s"INSERT INTO $t VALUES (2, named_struct('name', 'Bob', 'age', 25, 'email', 'bob@test'))")
+      checkAnswer(
+        sql(s"SELECT id, info.name, info.age, info.email FROM $t ORDER BY id"),
+        Seq(Row(1, "Alice", 30, null), Row(2, "Bob", 25, "bob@test")))
+    }
+  }
+
+  // The standard InMemoryTableCatalog preserves column IDs across type
+  // widening (e.g., INT -> LONG). The
+  // column ID check passes but schema validation catches the type mismatch.
+  test("same column ID but widened type caught by schema validation") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t ALTER COLUMN salary TYPE LONG")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: Join detection
+
+  test("join rejects stale DataFrame after drop+re-add column") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val df1 = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+      sql(s"INSERT INTO $t VALUES (1, 999)")
+
+      val df2 = spark.table(t)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          df1.join(df2, df1("id") === df2("id")).collect()
+        },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: DataFrame operation types
+  //
+  // The refresh logic validates column IDs for every DataSourceV2Relation
+  // in the plan, regardless of the operation applied on top (filter,
+  // aggregation, sort, projection).
+
+  test("filter on stale DataFrame after drop+re-add column rejects") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100), (2, 200)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.filter("salary > 50").collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("aggregate on stale DataFrame after drop+re-add column rejects") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100), (2, 200)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.groupBy("id").agg(sum("salary")).collect()
+        },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("sort on stale DataFrame after drop+re-add column rejects") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100), (2, 200)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.orderBy("salary").collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("select specific column from stale DataFrame after drop+re-add rejects") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.select("salary").collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: Subquery
+  //
+  // The refresh logic uses transformWithSubqueries, so column IDs in
+  // subquery plans are also validated.
+
+  test("subquery referencing stale table detects column ID mismatch") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100), (2, 200)")
+
+      val df = spark.sql(s"SELECT * FROM $t WHERE id IN (SELECT id FROM $t)")
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: Rename column interaction
+  //
+  // When a column is renamed, the old column name no longer exists in
+  // the current table. The column ID check skips it (not found by name),
+  // and the schema validation catches it as a missing column.
+
+  test("rename column triggers schema mismatch not column ID mismatch") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t RENAME COLUMN salary TO wage")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: Sequential schema changes
+  //
+  // Multiple schema changes between analysis and execution should still
+  // be correctly detected by the column ID validation.
+
+  test("double drop+re-add detects column ID mismatch") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val df = spark.table(t)
+
+      // first drop+re-add
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+      // second drop+re-add
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("drop+re-add one column while adding another does not affect unchanged columns") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+      sql(s"ALTER TABLE $t ADD COLUMN bonus INT")
+
+      // stale DataFrame detects salary ID mismatch
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*salary.*"))
+
+      // a fresh DataFrame succeeds (all current column IDs are consistent)
+      sql(s"INSERT INTO $t VALUES (2, 200, 50)")
+      // After DROP + re-ADD, InMemoryTable loses the old salary data for
+      // existing rows, so row 1 reads null for both re-added and new columns.
+      checkAnswer(spark.table(t), Seq(Row(1, null, null), Row(2, 200, 50)))
+    }
+  }
+
+  // Column ID tests: Type change in a catalog that resets IDs on type changes
+  //
+  // TypeChangeResetsColIdTableCatalog assigns new column IDs when the
+  // data type changes. This is the inverse of the standard InMemoryTableCatalog
+  // which preserves IDs across type changes.
+
+  test("type widening in reset-id catalog triggers column ID mismatch") {
+    val t = "resetidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t ALTER COLUMN salary TYPE LONG")
+
+      // reset-id catalog assigns a new ID for the widened column
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: Assignment verification
+  //
+  // Verifies that the InMemoryTableCatalog assigns unique, incrementing
+  // column IDs across schema changes.
+
+  test("column IDs are unique and incrementing across schema changes") {
+    val t = "testcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+
+      val cat = catalog("testcat")
+      val initialCols = cat.loadTable(ident).columns()
+      val idColId = initialCols.find(_.name() == "id").get.id()
+      val salaryColId = initialCols.find(_.name() == "salary").get.id()
+      assert(idColId != null && salaryColId != null, "initial columns should have IDs")
+      assert(idColId != salaryColId, "initial column IDs should be unique")
+
+      // add a column: new ID should be different from existing ones
+      sql(s"ALTER TABLE $t ADD COLUMN bonus INT")
+      val afterAddCols = cat.loadTable(ident).columns()
+      val bonusColId = afterAddCols.find(_.name() == "bonus").get.id()
+      assert(bonusColId != null, "added column should have an ID")
+      assert(bonusColId != idColId && bonusColId != salaryColId,
+        "added column ID should differ from existing IDs")
+      // existing column IDs should be preserved
+      assert(afterAddCols.find(_.name() == "id").get.id() == idColId)
+      assert(afterAddCols.find(_.name() == "salary").get.id() == salaryColId)
+
+      // drop+re-add: new ID should differ from all previous IDs
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+      val afterReaddCols = cat.loadTable(ident).columns()
+      val newSalaryColId = afterReaddCols.find(_.name() == "salary").get.id()
+      assert(newSalaryColId != salaryColId,
+        "re-added column should have a different ID than original")
+      assert(newSalaryColId != idColId && newSalaryColId != bonusColId,
+        "re-added column ID should be unique")
+      // unchanged column IDs should still be preserved
+      assert(afterReaddCols.find(_.name() == "id").get.id() == idColId)
+      assert(afterReaddCols.find(_.name() == "bonus").get.id() == bonusColId)
+    }
+  }
+
+  // Column ID tests: Temp view behavior
+  //
+  // SQL views do not capture column IDs. They resolve columns by name
+  // on each access, so column ID changes are invisible to them.
+
+  test("temp view tolerates drop+re-add column with same type") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      spark.table(t).createOrReplaceTempView("tmp_view")
+      checkAnswer(spark.sql("SELECT * FROM tmp_view"), Seq(Row(1, 100)))
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+      sql(s"INSERT INTO $t VALUES (2, 200)")
+
+      checkAnswer(
+        spark.sql("SELECT * FROM tmp_view"),
+        Seq(Row(1, null), Row(2, 200)))
+    }
+  }
+
+  test("temp view rejects drop+re-add column with different type") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+      spark.table(t).createOrReplaceTempView("tmp")
+      checkAnswer(sql("SELECT * FROM tmp"), Seq(Row(1, 100)))
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary STRING")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM tmp").collect()
+        },
+        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+        matchPVals = true,
+        parameters = Map(
+          "viewName" -> ".*", "tableName" -> ".*",
+          "colType" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  // Column ID tests: Write operations
+  //
+  // [[writeTo().append()]] eagerly executes the command during the
+  // [[commandExecuted]] phase, before the refresh phase runs. As a result,
+  // column ID validation does not apply to the source DataFrame in a
+  // [[writeTo]] path. The append succeeds without throwing a
+  // COLUMN_ID_MISMATCH error.
+  test("writeTo().append() does not throw column ID mismatch after drop+re-add column") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+      val source = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      // Command is eagerly executed before the refresh phase validates
+      // column IDs. No COLUMN_ID_MISMATCH exception is thrown.
+      source.writeTo(t).append()
+    }
+  }
+
+  test("insertInto rejects stale source after drop+re-add column") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+      val source = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          source.write.format(v2Format).insertInto(t)
+        },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+    }
+  }
+
+  // Column ID tests: Null table ID connector
+
+  // When a connector does not support table IDs but does support column IDs,
+  // column ID validation still catches drop+recreate.
+  test("null table ID: column IDs still detect drop+recreate") {
+    val t = "nullidcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val cat = catalog("nullidcat")
+      assert(cat.loadTable(ident).id == null,
+        "NullTableIdInMemoryTableCatalog should produce null table IDs")
+
+      val df = spark.table(t)
+
+      sql(s"DROP TABLE $t")
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+    }
+  }
+
+  // Column ID tests: Null column ID connector
+
+  // When a connector does not support column IDs, validation is skipped, but version
+  // tracking still detects the schema change and refreshes the table reference.
+  test("connector with null column IDs: drop+re-add column not detected") {
+    val t = "nullcolidcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      assert(catalog("nullcolidcat").loadTable(ident).columns().forall(_.id() == null),
+        "NullColumnIdInMemoryTableCatalog should produce null column IDs")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      // No column ID error because IDs are null. The table version changed, so
+      // [[V2TableRefreshUtil]] reloads it and the re-added salary column has null values.
+      checkAnswer(df, Seq(Row(1, null)))
+    }
+  }
+
+  test("connector with null column IDs: stale DataFrame reads after column addition " +
+      "without ID mismatch") {
+    val t = "nullcolidcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      val df = spark.table(t)
+
+      sql(s"ALTER TABLE $t ADD COLUMN bonus INT")
+
+      // The stale DataFrame has only [id, salary] while the table now has
+      // [id, salary, bonus]. Since column IDs are null, no COLUMN_ID_MISMATCH
+      // error is thrown; new columns are tolerated for read queries.
+      checkAnswer(df, Seq(Row(1, 100)))
+    }
+  }
+
+  // Column ID tests: Mixed null/non-null column IDs
+
+  test("mixed column IDs: original non-null ID, current null ID after drop+re-add not detected") {
+    val t = "mixedcolidcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      MixedColumnIdTableCatalog.reset()
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      // salary has a non-null column ID at analysis time
+      val salaryCol = catalog("mixedcolidcat").loadTable(ident).columns()
+        .find(_.name() == "salary").get
+      assert(salaryCol.id() != null, "salary should have a non-null ID initially")
+
+      val df = spark.table(t)
+
+      // make salary return null ID from now on
+      MixedColumnIdTableCatalog.nullIdColumnNames.add("salary")
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      val newSalaryCol = catalog("mixedcolidcat").loadTable(ident).columns()
+        .find(_.name() == "salary").get
+      assert(newSalaryCol.id() == null, "salary should have a null ID after re-add")
+
+      // No column ID error because current ID is null. The table is refreshed via
+      // version tracking, so the re-added salary column has null values.
+      checkAnswer(df, Seq(Row(1, null)))
+    }
+  }
+
+  test("mixed column IDs: original null ID, current non-null ID after drop+re-add not detected") {
+    val t = "mixedcolidcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      MixedColumnIdTableCatalog.reset()
+      MixedColumnIdTableCatalog.nullIdColumnNames.add("salary")
+      sql(s"CREATE TABLE $t (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 100)")
+
+      // salary has a null column ID at analysis time
+      val salaryCol = catalog("mixedcolidcat").loadTable(ident).columns()
+        .find(_.name() == "salary").get
+      assert(salaryCol.id() == null, "salary should have a null ID initially")
+
+      val df = spark.table(t)
+
+      // make salary return non-null ID from now on
+      MixedColumnIdTableCatalog.nullIdColumnNames.remove("salary")
+      sql(s"ALTER TABLE $t DROP COLUMN salary")
+      sql(s"ALTER TABLE $t ADD COLUMN salary INT")
+
+      val newSalaryCol = catalog("mixedcolidcat").loadTable(ident).columns()
+        .find(_.name() == "salary").get
+      assert(newSalaryCol.id() != null, "salary should have a non-null ID after re-add")
+
+      // No column ID error because original ID is null. The table is refreshed via
+      // version tracking, so the re-added salary column has null values.
+      checkAnswer(df, Seq(Row(1, null)))
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table allows top-level column additions") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      // create temp view using DataFrame API
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // add top-level column to underlying table
+      sql(s"ALTER TABLE $t ADD COLUMN age int")
+
+      // accessing temp view should succeed as top-level column additions are allowed
+      // view captures original columns
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // insert data to verify view still works correctly
+      sql(s"INSERT INTO $t VALUES (1, 'a', 25)")
+      checkAnswer(spark.table("v"), Seq(Row(1, "a")))
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table detects nested column additions") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, address STRUCT<street: STRING, city: STRING>) USING foo")
+
+      // create temp view using DataFrame API
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "address"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // add nested column to underlying table
+      sql(s"ALTER TABLE $t ADD COLUMN address.zipCode string")
+
+      // accessing temp view should detect schema change for nested additions
+      checkError(
+        exception = intercept[AnalysisException] { spark.table("v").collect() },
+        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+        parameters = Map(
+          "viewName" -> "`v`",
+          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+          "colType" -> "data",
+          "errors" -> "- `address`.`zipCode` STRING has been added"))
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table detects removed columns") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, age int) USING foo")
+
+      // create temp view
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data", "age"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // drop column from underlying table
+      sql(s"ALTER TABLE $t DROP COLUMN age")
+
+      // accessing temp view should detect schema change
+      checkError(
+        exception = intercept[AnalysisException] { spark.table("v").collect() },
+        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+        parameters = Map(
+          "viewName" -> "`v`",
+          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+          "colType" -> "data",
+          "errors" -> "- `age` INT has been removed"))
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table detects nested column removal") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, address STRUCT<street: STRING, city: STRING>) USING foo")
+
+      // create temp view using DataFrame API
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "address"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // drop nested column from underlying table
+      sql(s"ALTER TABLE $t DROP COLUMN address.city")
+
+      // accessing temp view should detect schema change for nested removals
+      checkError(
+        exception = intercept[AnalysisException] { spark.table("v").collect() },
+        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+        parameters = Map(
+          "viewName" -> "`v`",
+          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+          "colType" -> "data",
+          "errors" -> "- `address`.`city` STRING has been removed"))
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table detects nullability changes") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string NOT NULL) USING foo")
+
+      // create temp view
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // change nullability constraint using ALTER TABLE
+      sql(s"ALTER TABLE $t ALTER COLUMN data DROP NOT NULL")
+
+      // accessing temp view should detect schema change
+      checkError(
+        exception = intercept[AnalysisException] { spark.table("v").collect() },
+        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+        parameters = Map(
+          "viewName" -> "`v`",
+          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+          "colType" -> "data",
+          "errors" -> "- `data` is nullable now"))
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table accepts table ID changes") {
+    val t = "testcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      val df = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
+      df.write.insertInto(t)
+
+      // create temp view
+      spark.table(t).createOrReplaceTempView("v")
+      checkAnswer(spark.table("v"), df)
+
+      // capture the original table ID
+      val originalTableId = catalog("testcat").loadTable(ident).id
+
+      // drop and recreate table (this changes the table ID)
+      sql(s"DROP TABLE $t")
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      // verify table ID changed
+      val newTableId = catalog("testcat").loadTable(ident).id
+      assert(originalTableId != newTableId)
+
+      // accessing temp view should work despite table ID change (returns empty data)
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // insert new data and verify view reflects it
+      val newDF = Seq((3L, "c"), (4L, "d")).toDF("id", "data")
+      newDF.write.insertInto(t)
+      checkAnswer(spark.table("v"), newDF)
+    }
+  }
+
+  test("SPARK-53924: createOrReplaceTempView works after schema change") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data STRING, extra INT) USING foo")
+
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data", "extra"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // alter table
+      sql(s"ALTER TABLE $t DROP COLUMN extra")
+
+      // old view fails
+      intercept[AnalysisException] { spark.table("v").collect() }
+
+      // recreate view with updated schema
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // now it should work with new schema
+      val df = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
+      df.write.insertInto(t)
+      checkAnswer(spark.table("v"), df)
+    }
+  }
+
+
+  test("SPARK-53924: temp view on DSv2 table with read options") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      // create temp view with options
+      val df = spark.read.option("fakeOption", "testValue").table(t)
+      df.createOrReplaceTempView("v")
+
+      // verify options are preserved in the view
+      val options = spark.table("v").queryExecution.analyzed.collectFirst {
+        case d: DataSourceV2Relation => d.options
+      }.get
+      assert(options.get("fakeOption") == "testValue")
+
+      // add top-level column to underlying table
+      sql(s"ALTER TABLE $t ADD COLUMN age int")
+
+      // accessing temp view should succeed as top-level column additions are allowed
+
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+      checkAnswer(spark.table("v"), Seq.empty)
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table created using SQL with plan and top-level additions") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      withSQLConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW.key -> "true") {
+        sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+        // create temp view using SQL that should capture plan
+        sql(s"CREATE OR REPLACE TEMPORARY VIEW v AS SELECT * FROM $t")
+        assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+        checkAnswer(spark.table("v"), Seq.empty)
+
+        // verify that view stores analyzed plan
+        val Some(view) = spark.sessionState.catalog.getRawTempView("v")
+        assert(view.plan.isDefined)
+
+        // add top-level column to underlying table
+        sql(s"ALTER TABLE $t ADD COLUMN age int")
+
+        // accessing temp view should succeed as top-level column additions are allowed
+        assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+        checkAnswer(spark.table("v"), Seq.empty)
+
+        // insert data to verify view still works correctly
+        sql(s"INSERT INTO $t VALUES (1, 'a', 25)")
+        checkAnswer(spark.table("v"), Seq(Row(1, "a")))
+      }
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table detects VARCHAR/CHAR type changes") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, name VARCHAR(10)) USING foo")
+
+      // create temp view
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "name"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // change VARCHAR(10) to VARCHAR(20)
+      sql(s"ALTER TABLE $t ALTER COLUMN name TYPE VARCHAR(20)")
+
+      // accessing temp view should detect type change
+      checkError(
+        exception = intercept[AnalysisException] { spark.table("v").collect() },
+        condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION",
+        parameters = Map(
+          "viewName" -> "`v`",
+          "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`",
+          "colType" -> "data",
+          "errors" -> "- `name` type has changed from VARCHAR(10) to VARCHAR(20)"))
+    }
+  }
+
+  test("SPARK-53924: temp view on DSv2 table works after inserting data") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      // create temp view
+      spark.table(t).createOrReplaceTempView("v")
+      assert(spark.table("v").schema.fieldNames.toSeq == Seq("id", "data"))
+      checkAnswer(spark.table("v"), Seq.empty)
+
+      // insert data into underlying table (no schema change)
+      val df = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
+      df.write.insertInto(t)
+
+      // accessing temp view should work and reflect new data
+      checkAnswer(spark.table("v"), df)
+
+      // insert more data
+      val df2 = Seq((3L, "c"), (4L, "d")).toDF("id", "data")
+      df2.write.insertInto(t)
+
+      // view should reflect all data
+      checkAnswer(spark.table("v"), df.union(df2))
+    }
+  }
+
+  test("cached DSv2 table DataFrame is refreshed and reused after insert") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      val df1 = Seq((1L, "a"), (2L, "b")).toDF("id", "data")
+      df1.write.insertInto(t)
+
+      // cache DataFrame pointing to table
+      val readDF1 = spark.table(t)
+      readDF1.cache()
+      assertCached(readDF1)
+      checkAnswer(readDF1, Seq(Row(1L, "a"), Row(2L, "b")))
+
+      // insert more data, invalidating and refreshing cache entry
+      val df2 = Seq((3L, "c"), (4L, "d")).toDF("id", "data")
+      df2.write.insertInto(t)
+
+      // verify underlying plan is recached and picks up new data
+      val readDF2 = spark.table(t)
+      assertCached(readDF2)
+      checkAnswer(readDF2, Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, "d")))
+    }
+  }
+
+  test("SPARK-54022: caching table via Dataset API should pin table state") {
+    val t = "testcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id INT, value INT, category STRING) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, 10, 'A'), (2, 20, 'B'), (3, 30, 'A')")
 
       // cache table
       spark.table(t).cache()
@@ -1843,6 +3114,7 @@ class DataSourceV2DataFrameSuite
 
       // verify external changes are reflected correctly when table is queried
       assertNotCached(spark.table(t))
+      assert(spark.table(t).schema.fieldNames.toSeq == Seq("id", "value", "category"))
       checkAnswer(spark.table(t), Seq.empty)
     }
   }
@@ -2178,4 +3450,144 @@ class DataSourceV2DataFrameSuite
       spark.listenerManager.unregister(listener)
     }
   }
+
+  test("withSchemaEvolution: saveAsTable append evolves the table schema to add a new column") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint) USING $v2Format")
+      val df = Seq((1L, "a")).toDF("id", "data")
+
+      df.write.mode("append").format(v2Format).withSchemaEvolution().saveAsTable(t)
+
+      assert(spark.table(t).schema ===
+        new StructType().add("id", LongType).add("data", StringType))
+      checkAnswer(spark.table(t), Seq(Row(1L, "a")))
+    }
+  }
+
+  test("withSchemaEvolution: insertInto evolves the table schema to add a new column") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint) USING $v2Format")
+      val df = Seq((1L, "a")).toDF("id", "data")
+
+      df.write.format(v2Format).withSchemaEvolution().insertInto(t)
+
+      assert(spark.table(t).schema ===
+        new StructType().add("id", LongType).add("data", StringType))
+      checkAnswer(spark.table(t), Seq(Row(1L, "a")))
+    }
+  }
+
+  test("withSchemaEvolution: insertInto Overwrite evolves the table schema") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint) USING $v2Format")
+      val df = Seq((1L, "a")).toDF("id", "data")
+
+      df.write.mode("overwrite").format(v2Format).withSchemaEvolution().insertInto(t)
+
+      assert(spark.table(t).schema ===
+        new StructType().add("id", LongType).add("data", StringType))
+      checkAnswer(spark.table(t), Seq(Row(1L, "a")))
+    }
+  }
+
+  test("withSchemaEvolution: dynamic partition overwrite evolves the table schema") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint) USING $v2Format PARTITIONED BY (id)")
+      withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key ->
+          SQLConf.PartitionOverwriteMode.DYNAMIC.toString) {
+        Seq((1L, "a")).toDF("id", "data")
+          .write.mode("overwrite").format(v2Format).withSchemaEvolution().insertInto(t)
+
+        assert(spark.table(t).schema ===
+          new org.apache.spark.sql.types.StructType()
+            .add("id", org.apache.spark.sql.types.LongType)
+            .add("data", StringType))
+        checkAnswer(spark.table(t), Seq(Row(1L, "a")))
+      }
+    }
+  }
+
+  test("withSchemaEvolution: saveAsTable Overwrite with existing table fails with REPLACE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t USING $v2Format AS SELECT 0L id, 'z' data")
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq((1L, "a")).toDF("id", "data")
+            .write.mode("overwrite").format(v2Format).withSchemaEvolution().saveAsTable(t)
+        },
+        condition = "UNSUPPORTED_SCHEMA_EVOLUTION.REPLACE_TABLE",
+        parameters = Map.empty)
+    }
+  }
+
+  test("withSchemaEvolution: saveAsTable Overwrite with missing table fails with REPLACE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq((1L, "a")).toDF("id", "data")
+            .write.mode("overwrite").format(v2Format).withSchemaEvolution().saveAsTable(t)
+        },
+        condition = "UNSUPPORTED_SCHEMA_EVOLUTION.REPLACE_TABLE",
+        parameters = Map.empty)
+    }
+  }
+
+  test("withSchemaEvolution: saveAsTable ErrorIfExists/Ignore fails with CREATE_TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      val df = Seq((1L, "a")).toDF("id", "data")
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.write.format(v2Format).withSchemaEvolution().saveAsTable(t)
+        },
+        condition = "UNSUPPORTED_SCHEMA_EVOLUTION.CREATE_TABLE",
+        parameters = Map.empty)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.write.mode("ignore").format(v2Format).withSchemaEvolution().saveAsTable(t)
+        },
+        condition = "UNSUPPORTED_SCHEMA_EVOLUTION.CREATE_TABLE",
+        parameters = Map.empty)
+    }
+  }
+
+  test("withSchemaEvolution: save/saveAsTable/insertInto to a V1 source/table fail") {
+    withTempPath { p =>
+      val path = p.getCanonicalPath
+      // V1 file-based source (parquet) - no V2 batch write, falls back to V1.
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq((1L, "a")).toDF("id", "data")
+            .write.format("parquet").withSchemaEvolution().save(path)
+        },
+        condition = "UNSUPPORTED_SCHEMA_EVOLUTION.V1_TABLE",
+        parameters = Map.empty)
+    }
+
+    withTable("v1_table") {
+      sql("CREATE TABLE v1_table (id bigint, data string) USING parquet")
+      val df = Seq((1L, "a")).toDF("id", "data")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.write.mode("append").withSchemaEvolution().saveAsTable("v1_table")
+        },
+        condition = "UNSUPPORTED_SCHEMA_EVOLUTION.V1_TABLE",
+        parameters = Map.empty)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.write.withSchemaEvolution().insertInto("v1_table")
+        },
+        condition = "UNSUPPORTED_SCHEMA_EVOLUTION.V1_TABLE",
+        parameters = Map.empty)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2EnhancedPartitionFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2EnhancedPartitionFilterSuite.scala
index 956a88406b0ff..3fe928daf9e03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2EnhancedPartitionFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2EnhancedPartitionFilterSuite.scala
@@ -49,6 +49,8 @@ import org.apache.spark.sql.test.SharedSparkSession
  * 6. Untranslatable, Data Filter -> Post-Scan Filters
  * 7. Untranslatable, Partition Filter, 2nd Pass Returned -> Post-Scan Filters
  * 8. Untranslatable, Partition Filter, 2nd Pass Accepted -> Pushed Down
+ * 9. Translated, Partition Filter, 1st Pass Accepted AND Returned (partial pushdown) ->
+ *    Pushed Down in 1st pass, NOT re-derived in 2nd pass.
  */
 class DataSourceV2EnhancedPartitionFilterSuite
   extends SharedSparkSession with BeforeAndAfter with PredicateHelper {
@@ -220,6 +222,30 @@ class DataSourceV2EnhancedPartitionFilterSuite
     }
   }
 
+  test("case 9: partition filter pushed but returned in first pass is not re-pushed second pass") {
+    withTable(partFilterTableName) {
+      // The source accepts the partition predicate in the first pass (so it is reported by
+      // pushedPredicates and prunes partitions) but also returns it for post-scan evaluation,
+      // simulating a partial pushdown (e.g. a row group filter).
+      sql(s"CREATE TABLE $partFilterTableName (part_col string, data string) USING $v2Source " +
+        "PARTITIONED BY (part_col) " +
+        "TBLPROPERTIES('return-accepted-partition-predicates' = 'true')")
+      sql(s"INSERT INTO $partFilterTableName VALUES ('a', 'x'), ('b', 'y'), ('c', 'z')")
+
+      // Translated, Partition Filter; 1st Pass Accepted AND Returned.
+      // The second pass derives PartitionPredicates only from filters that were NOT already
+      // pushed (not in pushedPredicates). Since this filter was pushed in the first pass, it must
+      // NOT be pushed again as a PartitionPredicate in the second pass.
+      val df = sql(s"SELECT * FROM $partFilterTableName WHERE part_col = 'b'")
+      checkAnswer(df, Seq(Row("b", "y")))
+      assertPushedPartitionPredicates(df, 0)
+      assertScanReturnsPartitionKeys(df, Set("b"))
+      // The returned filter is still evaluated after the scan.
+      assert(df.queryExecution.executedPlan.exists(_.isInstanceOf[FilterExec]),
+        "Partition filter returned in first pass should remain as a post-scan Filter")
+    }
+  }
+
   test("nested identity partition: second-pass PartitionPredicate with UDF on nested key") {
     withTable(partFilterTableName) {
       sql(s"CREATE TABLE $partFilterTableName " +
@@ -257,6 +283,34 @@ class DataSourceV2EnhancedPartitionFilterSuite
     }
   }
 
+  test("nested identity partition: case 9 partition filter pushed but returned in first pass " +
+    "is not re-pushed second pass") {
+    withTable(partFilterTableName) {
+      // The source accepts the partition predicate in the first pass (so it is reported by
+      // pushedPredicates and prunes partitions) but also returns it for post-scan evaluation,
+      // simulating a partial pushdown (e.g. a row group filter).
+      sql(s"CREATE TABLE $partFilterTableName " +
+        s"(s struct<tz: string, x: int>, data string) USING $v2Source " +
+        "PARTITIONED BY (s.tz) " +
+        "TBLPROPERTIES('return-accepted-partition-predicates' = 'true')")
+      sql(s"INSERT INTO $partFilterTableName VALUES " +
+        "(named_struct('tz', 'LA', 'x', 1), 'a'), " +
+        "(named_struct('tz', 'NY', 'x', 2), 'b')")
+
+      // Translated, Partition Filter; 1st Pass Accepted AND Returned.
+      // The second pass derives PartitionPredicates only from filters that were NOT already
+      // pushed (not in pushedPredicates). Since this filter was pushed in the first pass, it must
+      // NOT be pushed again as a PartitionPredicate in the second pass.
+      val df = sql(s"SELECT * FROM $partFilterTableName WHERE s.tz = 'LA'")
+      checkAnswer(df, Seq(Row(Row("LA", 1), "a")))
+      assertPushedPartitionPredicates(df, 0)
+      assertScanReturnsPartitionKeys(df, Set("LA"))
+      // The returned filter is still evaluated after the scan.
+      assert(df.queryExecution.executedPlan.exists(_.isInstanceOf[FilterExec]),
+        "Partition filter returned in first pass should remain as a post-scan Filter")
+    }
+  }
+
   test("nested identity partition: field name containing a dot") {
     withTable(partFilterTableName) {
       sql(s"CREATE TABLE $partFilterTableName " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2ExtSessionColumnIdSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2ExtSessionColumnIdSuite.scala
new file mode 100644
index 0000000000000..ed46f33e7df01
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2ExtSessionColumnIdSuite.scala
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SparkSession}
+import org.apache.spark.sql.connector.catalog.SharedInMemoryTableCatalog
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Tests for cross-session column ID detection where an external
+ * [[SparkSession]] modifies a DSv2 table while another session holds
+ * a stale [[DataFrame]].
+ *
+ * Each [[SparkSession]] has its own [[CacheManager]], so session1 does
+ * not know about session2's schema changes. When session1 executes a
+ * stale [[DataFrame]], the refresh logic calls [[catalog.loadTable]] to
+ * get the latest table metadata from the catalog and compares the
+ * captured column IDs against the current ones.
+ *
+ * [[SharedInMemoryTableCatalog]] makes this work by storing tables in
+ * a static [[ConcurrentHashMap]] that all catalog instances share,
+ * regardless of which session created them.
+ */
+class DataSourceV2ExtSessionColumnIdSuite extends QueryTest with SharedSparkSession {
+
+  override protected def sparkConf: SparkConf = super.sparkConf
+    .set(SQLConf.ANSI_ENABLED, true)
+    .set("spark.sql.catalog.sharedcat",
+      classOf[SharedInMemoryTableCatalog].getName)
+    // copyOnLoad: each loadTable returns a fresh copy, simulating a real
+    // catalog where metadata is reloaded from the metastore on each access
+    .set("spark.sql.catalog.sharedcat.copyOnLoad", "true")
+
+  override def afterEach(): Unit = {
+    try {
+      SharedInMemoryTableCatalog.reset()
+      spark.sessionState.catalogManager.reset()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  /**
+   * Creates a second [[SparkSession]] with its own [[CacheManager]] but
+   * sharing the same [[SparkContext]] (and therefore the same catalog
+   * configs like `spark.sql.catalog.sharedcat`).
+   *
+   * We clear the active/default session first so that
+   * [[SparkSession.builder().create()]] allocates a brand new
+   * [[SharedState]] instead of reusing the existing one. The
+   * `finally` block restores the original active/default session
+   * so the test's main session is not disrupted.
+   *
+   * This is not a true external process (same JVM), but it is
+   * sufficient: session1's [[CacheManager]] is unaware of session2's
+   * writes, and [[catalog.loadTable]] reads from the shared static
+   * map, returning the latest metadata.
+   */
+  private def withExtSession(f: SparkSession => Unit): Unit = {
+    val savedActive = SparkSession.getActiveSession
+    val savedDefault = SparkSession.getDefaultSession
+    val extSession = try {
+      SparkSession.clearActiveSession()
+      SparkSession.clearDefaultSession()
+      SparkSession.builder()
+        .sparkContext(spark.sparkContext)
+        .create()
+    } finally {
+      savedDefault.foreach(s =>
+        SparkSession.setDefaultSession(s))
+      savedActive.foreach(s =>
+        SparkSession.setActiveSession(s))
+    }
+    f(extSession)
+  }
+
+  private val T = "sharedcat.ns.tbl"
+
+  test("external write visible via fresh query") {
+    withTable(T) {
+      sql(s"CREATE TABLE $T (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $T VALUES (1, 100)")
+
+      checkAnswer(spark.table(T), Seq(Row(1, 100)))
+
+      // external session writes data
+      withExtSession { ext =>
+        ext.sql(s"INSERT INTO $T VALUES (2, 200)").collect()
+      }
+
+      // a fresh query from session1 picks up external write
+      checkAnswer(
+        spark.table(T),
+        Seq(Row(1, 100), Row(2, 200)))
+    }
+  }
+
+  // spark.table(T) captures column IDs at analysis time. When
+  // an external session drops and re-adds a column, the column gets
+  // a new ID. Session1's stale DataFrame detects the mismatch.
+  test("external drop+re-add column detected by column ID") {
+    withTable(T) {
+      sql(s"CREATE TABLE $T (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $T VALUES (1, 100)")
+
+      val df = spark.table(T)
+
+      // external session drops and re-adds column
+      withExtSession { ext =>
+        ext.sql(s"ALTER TABLE $T DROP COLUMN salary").collect()
+        ext.sql(s"ALTER TABLE $T ADD COLUMN salary INT").collect()
+      }
+
+      // column ID changed, session1 detects it
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.collect()
+        },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map(
+          "tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+
+  test("external drop+recreate table detected by column ID") {
+    withTable(T) {
+      sql(s"CREATE TABLE $T (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $T VALUES (1, 100)")
+      val df = spark.table(T)
+
+      // external session drops and recreates table
+      withExtSession { ext =>
+        ext.sql(s"DROP TABLE $T").collect()
+        ext.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect()
+      }
+
+      // table ID is null (SharedInMemoryTableCatalog extends
+      // NullTableIdInMemoryTableCatalog), so column ID check catches it
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> "(?s).*"))
+    }
+  }
+
+  // Adding a column from an external session preserves existing column IDs.
+  // The stale DataFrame should still work because its captured column IDs
+  // match the current table's unchanged columns.
+  test("external add column does not trigger column ID mismatch") {
+    withTable(T) {
+      sql(s"CREATE TABLE $T (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $T VALUES (1, 100)")
+
+      val df = spark.table(T)
+
+      // external session adds a new column
+      withExtSession { ext =>
+        ext.sql(s"ALTER TABLE $T ADD COLUMN bonus INT").collect()
+        ext.sql(s"INSERT INTO $T VALUES (2, 200, 50)").collect()
+      }
+
+      // session1's stale DataFrame still works: id and salary IDs unchanged
+      checkAnswer(df, Seq(Row(1, 100), Row(2, 200)))
+    }
+  }
+
+  test("external drop+re-add multiple columns detected by column ID") {
+    withTable(T) {
+      sql(s"CREATE TABLE $T (id INT, salary INT, bonus INT) USING foo")
+      sql(s"INSERT INTO $T VALUES (1, 100, 10)")
+
+      val df = spark.table(T)
+
+      // external session drops and re-adds both salary and bonus
+      withExtSession { ext =>
+        ext.sql(s"ALTER TABLE $T DROP COLUMN salary").collect()
+        ext.sql(s"ALTER TABLE $T DROP COLUMN bonus").collect()
+        ext.sql(s"ALTER TABLE $T ADD COLUMN salary INT").collect()
+        ext.sql(s"ALTER TABLE $T ADD COLUMN bonus INT").collect()
+      }
+
+      // both column ID mismatches are detected
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*",
+          "errors" -> "(?s).*salary.*bonus.*"))
+    }
+  }
+
+  test("external type widening detected by data columns validation") {
+    withTable(T) {
+      sql(s"CREATE TABLE $T (id INT, salary INT) USING foo")
+      sql(s"INSERT INTO $T VALUES (1, 100)")
+
+      val df = spark.table(T)
+
+      // external session widens salary from INT to LONG
+      // SharedInMemoryTableCatalog preserves the column ID across type
+      // changes, so data columns validation catches the type mismatch
+      // instead of the column ID check
+      withExtSession { ext =>
+        ext.sql(s"ALTER TABLE $T ALTER COLUMN salary TYPE LONG").collect()
+      }
+
+      checkError(
+        exception = intercept[AnalysisException] { df.collect() },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataTableSuite.scala
new file mode 100644
index 0000000000000..37acbf1e0442f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataTableSuite.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+import org.apache.spark.sql.connector.catalog.{Identifier, MetadataTable, Table, TableCatalog, TableChange, TableInfo, TableSummary}
+import org.apache.spark.sql.connector.expressions.LogicalExpressions
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * Tests for the data-source-table side of [[MetadataTable]]: a v2 catalog returns
+ * metadata-only tables and Spark reads / writes them via the V1 data-source path.
+ * View-related paths live in [[DataSourceV2MetadataViewSuite]].
+ */
+class DataSourceV2MetadataTableSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(
+      "spark.sql.catalog.table_catalog",
+      classOf[TestingDataSourceTableCatalog].getName)
+
+  test("file source table") {
+    withTempPath { path =>
+      val loc = path.getCanonicalPath
+      val tableName = s"table_catalog.`$loc`.test_json"
+
+      spark.range(10).select($"id".cast("string").as("col")).write.json(loc)
+      checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i.toString)))
+
+      sql(s"INSERT INTO $tableName SELECT 'abc'")
+      checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i.toString)) :+ Row("abc"))
+
+      sql(s"INSERT OVERWRITE $tableName SELECT 'xyz'")
+      checkAnswer(spark.table(tableName), Row("xyz"))
+    }
+  }
+
+  test("partitioned file source table") {
+    withTempPath { path =>
+      val loc = path.getCanonicalPath
+      val tableName = s"table_catalog.`$loc`.test_partitioned_json"
+
+      Seq(1 -> 1, 2 -> 1).toDF("c1", "c2").write.partitionBy("c2").json(loc)
+      checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1)))
+
+      sql(s"INSERT INTO $tableName SELECT 1, 2")
+      checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(1, 2)))
+
+      sql(s"INSERT INTO $tableName PARTITION(c2=3) SELECT 1")
+      checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(1, 2), Row(1, 3)))
+
+      sql(s"INSERT OVERWRITE $tableName PARTITION(c2=2) SELECT 10")
+      checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(10, 2), Row(1, 3)))
+
+      sql(s"INSERT OVERWRITE $tableName SELECT 20, 20")
+      checkAnswer(spark.table(tableName), Row(20, 20))
+    }
+  }
+
+  // TODO: move the v2 data source table handling from V2SessionCatalog to the analyzer
+  ignore("v2 data source table") {
+    val tableName = "table_catalog.default.test_v2"
+    checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i, -i)))
+  }
+
+  test("fully-qualified column reference uses the real catalog name") {
+    withTempPath { path =>
+      val loc = path.getCanonicalPath
+      val tableName = s"table_catalog.`$loc`.test_json"
+
+      spark.range(3).select($"id".cast("string").as("col")).write.json(loc)
+
+      // 1-part and 2-part references resolve via last-part suffix matching.
+      checkAnswer(
+        sql(s"SELECT test_json.col FROM $tableName"),
+        Seq(Row("0"), Row("1"), Row("2")))
+      checkAnswer(
+        sql(s"SELECT `$loc`.test_json.col FROM $tableName"),
+        Seq(Row("0"), Row("1"), Row("2")))
+
+      // 3-part reference uses the real catalog name. `V1Table.toCatalogTable` sets
+      // `CatalogTable.multipartIdentifier` to `[table_catalog, <loc>, test_json]`; the
+      // SessionCatalog change in this PR makes `getRelation` prefer that over the hardcoded
+      // `spark_catalog` qualifier, so the SubqueryAlias carries the real catalog and this
+      // 3-part column ref resolves.
+      checkAnswer(
+        sql(s"SELECT $tableName.col FROM $tableName"),
+        Seq(Row("0"), Row("1"), Row("2")))
+    }
+  }
+}
+
+/**
+ * A read-only [[TableCatalog]] that returns [[MetadataTable]] for a small set of canned
+ * table fixtures. Used to drive the data-source-table read path (file source + v2 provider)
+ * through Spark's V1 data-source machinery.
+ */
+class TestingDataSourceTableCatalog extends TableCatalog {
+  override def loadTable(ident: Identifier): Table = ident.name() match {
+    case "test_json" =>
+      val info = new TableInfo.Builder()
+        .withSchema(new StructType().add("col", "string"))
+        .withProvider("json")
+        .withLocation(ident.namespace().head)
+        .withTableType(TableSummary.EXTERNAL_TABLE_TYPE)
+        .build()
+      new MetadataTable(info, ident.toString)
+    case "test_partitioned_json" =>
+      val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2")))
+      val info = new TableInfo.Builder()
+        .withSchema(new StructType().add("c1", "int").add("c2", "int"))
+        .withProvider("json")
+        .withLocation(ident.namespace().head)
+        .withTableType(TableSummary.EXTERNAL_TABLE_TYPE)
+        .withPartitions(Array(partitioning))
+        .build()
+      new MetadataTable(info, ident.toString)
+    case "test_v2" =>
+      val info = new TableInfo.Builder()
+        .withSchema(FakeV2Provider.schema)
+        .withProvider(classOf[FakeV2Provider].getName)
+        .build()
+      new MetadataTable(info, ident.toString)
+    case _ => throw new NoSuchTableException(ident)
+  }
+
+  override def createTable(ident: Identifier, info: TableInfo): Table =
+    throw new RuntimeException("shouldn't be called")
+  override def alterTable(ident: Identifier, changes: TableChange*): Table =
+    throw new RuntimeException("shouldn't be called")
+  override def dropTable(ident: Identifier): Boolean =
+    throw new RuntimeException("shouldn't be called")
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit =
+    throw new RuntimeException("shouldn't be called")
+  override def listTables(namespace: Array[String]): Array[Identifier] =
+    throw new RuntimeException("shouldn't be called")
+
+  private var catalogName = ""
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    catalogName = name
+  }
+  override def name(): String = catalogName
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataViewSuite.scala
new file mode 100644
index 0000000000000..163c0957e0d0e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataViewSuite.scala
@@ -0,0 +1,669 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, NoSuchViewException, TableAlreadyExistsException, ViewAlreadyExistsException}
+import org.apache.spark.sql.connector.catalog.{Identifier, MetadataTable, Table, TableCatalog, TableChange, TableInfo, TableSummary, TableViewCatalog, V1Table, ViewCatalog, ViewInfo}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * Tests for the view side of [[MetadataTable]]: view-text expansion on read, and
+ * CREATE VIEW / ALTER VIEW ... AS going through the v2 write path
+ * (`CreateV2ViewExec` / `AlterV2ViewExec`). View writes route through
+ * [[ViewCatalog#createView]] / [[ViewCatalog#replaceView]].
+ * Data-source-table read paths live in
+ * [[org.apache.spark.sql.connector.DataSourceV2MetadataTableSuite]].
+ *
+ * TODO: register a `MetadataTable`-backed `DelegatingCatalogExtension` as
+ * `spark.sql.catalog.spark_catalog` and run the shared
+ * [[org.apache.spark.sql.execution.PersistedViewTestSuite]] body against the v2 path for full
+ * parity with the v1 persisted-view coverage.
+ */
+class DataSourceV2MetadataViewSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.view_catalog", classOf[TestingTableViewCatalog].getName)
+
+  // --- View read path -----------------------------------------------------
+
+  test("read view expands SQL text and applies captured SQL configs") {
+    withTable("spark_catalog.default.t") {
+      Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t")
+      // view_catalog.ansi.test_view stores view.sqlConfig.spark.sql.ansi.enabled=true;
+      // view_catalog.non_ansi.test_view stores the same key with value `false`. The view body
+      // does `col::int` which errors in ANSI mode and yields NULL in non-ANSI mode.
+      intercept[Exception](spark.table("view_catalog.ansi.test_view").collect())
+      checkAnswer(spark.table("view_catalog.non_ansi.test_view"), Row("b", null))
+    }
+  }
+
+  test("read view resolves unqualified refs via captured current catalog/namespace") {
+    withTable("spark_catalog.default.t") {
+      Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t")
+      // View text uses the unqualified name `t`; it resolves via the stored
+      // current catalog / namespace properties.
+      checkAnswer(spark.table("view_catalog.ns.test_unqualified_view"), Row("b"))
+    }
+  }
+
+  test("read view resolves unqualified refs via multi-part captured namespace") {
+    // End-to-end coverage of the v2 encoder -> parser round-trip: test_unqualified_multi is a
+    // view whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part namespace) and
+    // whose body references `t` unqualified. At read time the unqualified `t` must expand to
+    // view_catalog.ns1.ns2.t via the captured context -- which TestingTableViewCatalog resolves to
+    // its own `t` fixture at that namespace.
+    checkAnswer(
+      spark.table("view_catalog.outer_ns.test_unqualified_multi"),
+      Row("multi"))
+  }
+
+  // --- ViewInfo unit tests -----------------------------------------------
+
+  test("multi-part captured namespace round-trips through V1Table.toCatalogTable") {
+    // (a) ViewInfo.Builder stores (cat, Array(db1, db2)) as typed fields.
+    // (b) V1Table.toCatalogTable reads them directly and emits v1's numbered
+    //     view.catalogAndNamespace.* keys so (c) the resulting CatalogTable's
+    //     `viewCatalogAndNamespace` exposes the full (cat, db1, db2), which is what the v1
+    //     view-resolution path consumes to expand unqualified references in the view body.
+    val info = new ViewInfo.Builder()
+      .withSchema(new StructType().add("col", "string"))
+      .withQueryText("SELECT col FROM t")
+      .withCurrentCatalog("my_cat")
+      .withCurrentNamespace(Array("db1", "db2"))
+      .build()
+    val motTable = new MetadataTable(info, "v")
+    // Any CatalogPlugin works here; toCatalogTable only reads `catalog.name()`.
+    val catalog = spark.sessionState.catalogManager.catalog("view_catalog")
+    val ct = V1Table.toCatalogTable(
+      catalog, Identifier.of(Array("ns"), "v"), motTable)
+    assert(ct.viewCatalogAndNamespace == Seq("my_cat", "db1", "db2"))
+
+    // Namespace parts containing dots flow through structurally (no string encoding).
+    val infoWeird = new ViewInfo.Builder()
+      .withSchema(new StructType().add("col", "string"))
+      .withQueryText("SELECT col FROM t")
+      .withCurrentCatalog("my_cat")
+      .withCurrentNamespace(Array("weird.db", "normal"))
+      .build()
+    val ctWeird = V1Table.toCatalogTable(
+      catalog, Identifier.of(Array("ns"), "v"), new MetadataTable(infoWeird, "v"))
+    assert(ctWeird.viewCatalogAndNamespace == Seq("my_cat", "weird.db", "normal"))
+  }
+
+  test("view with no captured catalog omits viewCatalogAndNamespace") {
+    val info = new ViewInfo.Builder()
+      .withSchema(new StructType().add("col", "string"))
+      .withQueryText("SELECT * FROM spark_catalog.default.t")
+      .build()
+    val motTable = new MetadataTable(info, "v")
+    val catalog = spark.sessionState.catalogManager.catalog("view_catalog")
+    val ct = V1Table.toCatalogTable(catalog, Identifier.of(Array("ns"), "v"), motTable)
+    assert(ct.viewCatalogAndNamespace.isEmpty)
+  }
+
+  // CREATE VIEW behavior tests live in the per-catalog triplet
+  // `sql.execution.command.{,v1/,v2/}.CreateViewSuite{,Base}`.
+
+  // ALTER VIEW behavior tests live in the per-catalog triplet
+  // `sql.execution.command.{,v1/,v2/}.AlterViewAsSuite{,Base}`.
+
+  // --- Pure ViewCatalog (no TableCatalog mixin) ---------------------------
+
+  test("read view from a pure ViewCatalog (no TableCatalog mixin)") {
+    // The analyzer's table-side lookup must skip `loadTable` entirely for catalogs that don't
+    // implement `TableCatalog`; otherwise `asTableCatalog` would throw
+    // MISSING_CATALOG_ABILITY.TABLES and the legitimate `loadView` fallback would never run.
+    withSQLConf(
+      "spark.sql.catalog.view_only" -> classOf[TestingViewOnlyCatalog].getName) {
+      withTable("spark_catalog.default.t") {
+        Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t")
+        // The fixture stores a `pure_v` view whose body filters spark_catalog.default.t.
+        checkAnswer(spark.table("view_only.default.pure_v"), Seq(Row(2), Row(3)))
+      }
+    }
+  }
+
+  test("ALTER VIEW on a pure ViewCatalog (no TableCatalog mixin)") {
+    withSQLConf(
+      "spark.sql.catalog.view_only" -> classOf[TestingViewOnlyCatalog].getName) {
+      val catalog = spark.sessionState.catalogManager.catalog("view_only")
+        .asInstanceOf[TestingViewOnlyCatalog]
+      withTable("spark_catalog.default.t") {
+        Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t")
+        sql("ALTER VIEW view_only.default.pure_v AS " +
+          "SELECT x FROM spark_catalog.default.t WHERE x > 2")
+        assert(catalog.loadView(Identifier.of(Array("default"), "pure_v")).queryText() ==
+          "SELECT x FROM spark_catalog.default.t WHERE x > 2")
+      }
+    }
+  }
+
+  test("cyclic detection distinguishes views across multi-level namespaces") {
+    withTable("spark_catalog.default.t") {
+      Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t")
+
+      // Two views whose last namespace segment collides (`inner`) but whose full multi-part
+      // identifiers differ -- pin that cyclic detection compares them by `fullIdent`
+      // (`view_catalog.ns1.inner.v` vs `view_catalog.ns2.inner.v`) and not by the lossy
+      // 3-part `TableIdentifier` form, which would collapse both to
+      // `TableIdentifier(v, Some("inner"), Some("view_catalog"))` and false-positive on
+      // legitimate cross-namespace REPLACE.
+      sql("CREATE VIEW view_catalog.ns1.inner.v AS SELECT x FROM spark_catalog.default.t")
+      sql("CREATE VIEW view_catalog.ns2.inner.v AS " +
+        "SELECT x FROM view_catalog.ns1.inner.v")
+      // Legitimate non-cyclic REPLACE -- new body references a different view that happens to
+      // share the last namespace segment. Must not false-positive.
+      sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.v AS " +
+        "SELECT x FROM spark_catalog.default.t WHERE x > 1")
+      checkAnswer(spark.table("view_catalog.ns1.inner.v"), Seq(Row(2), Row(3)))
+
+      // Real cycle across the two namespaces must still be caught.
+      val ex = intercept[AnalysisException] {
+        sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.v AS " +
+          "SELECT x FROM view_catalog.ns2.inner.v")
+      }
+      assert(ex.getCondition == "RECURSIVE_VIEW")
+    }
+  }
+
+  test("view error messages render the full multi-level namespace") {
+    withTable("spark_catalog.default.t") {
+      Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t")
+      sql("CREATE VIEW view_catalog.ns1.inner.v_err AS " +
+        "SELECT x FROM spark_catalog.default.t")
+      // Second CREATE surfaces `viewAlreadyExistsError` (via TableAlreadyExistsException from
+      // the catalog). Pin that the rendered name carries every multi-level-namespace segment
+      // (`view_catalog.ns1.inner.v_err`) -- routing the name through `Seq[String]` rather
+      // than a 3-part `TableIdentifier` is what preserves the outer `ns1` segment in the
+      // user-visible message.
+      val dup = intercept[AnalysisException] {
+        sql("CREATE VIEW view_catalog.ns1.inner.v_err AS " +
+          "SELECT x FROM spark_catalog.default.t")
+      }
+      assert(dup.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS")
+      assert(dup.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_err`"),
+        s"expected full multi-part name in error, got: ${dup.getMessage}")
+
+      // CREATE OR REPLACE VIEW over a non-view table entry surfaces
+      // `unsupportedCreateOrReplaceViewOnTableError`. Pre-seed a non-view entry at a
+      // multi-level-namespace identifier to exercise the rendering.
+      val catalog = spark.sessionState.catalogManager.catalog("view_catalog")
+        .asInstanceOf[TestingTableViewCatalog]
+      val tblIdent = Identifier.of(Array("ns1", "inner"), "t_err")
+      catalog.createTable(
+        tblIdent,
+        new TableInfo.Builder()
+          .withSchema(new StructType().add("col", "string"))
+          .withTableType(TableSummary.EXTERNAL_TABLE_TYPE)
+          .build())
+      try {
+        val notView = intercept[AnalysisException] {
+          sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.t_err AS " +
+            "SELECT x FROM spark_catalog.default.t")
+        }
+        assert(notView.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE")
+        assert(notView.getMessage.contains("`view_catalog`.`ns1`.`inner`.`t_err`"),
+          s"expected full multi-part name in error, got: ${notView.getMessage}")
+      } finally {
+        catalog.dropTable(tblIdent)
+      }
+
+      // Column-arity mismatch error.
+      val arity = intercept[AnalysisException] {
+        sql("CREATE VIEW view_catalog.ns1.inner.v_arity (a, b) AS " +
+          "SELECT x FROM spark_catalog.default.t")
+      }
+      assert(arity.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_arity`"),
+        s"expected full multi-part name in error, got: ${arity.getMessage}")
+    }
+  }
+
+  test("ALTER VIEW cyclic detection distinguishes views across multi-level namespaces") {
+    withTable("spark_catalog.default.t") {
+      Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t")
+
+      sql("CREATE VIEW view_catalog.ns1.inner.v_alter AS " +
+        "SELECT x FROM spark_catalog.default.t")
+      sql("CREATE VIEW view_catalog.ns2.inner.v_alter AS " +
+        "SELECT x FROM view_catalog.ns1.inner.v_alter")
+
+      // Legitimate non-cyclic ALTER -- the new body references `spark_catalog.default.t`,
+      // not the view being altered. Pin that ALTER's cyclic detection compares views by
+      // `fullIdent` so the two `inner.v_alter` views in different namespaces stay distinct;
+      // a comparison via the lossy 3-part `TableIdentifier` would collapse both to
+      // `TableIdentifier(v_alter, Some("inner"), Some("view_catalog"))` and false-positive
+      // here.
+      sql("ALTER VIEW view_catalog.ns1.inner.v_alter AS " +
+        "SELECT x FROM spark_catalog.default.t WHERE x > 1")
+      checkAnswer(
+        spark.table("view_catalog.ns1.inner.v_alter"),
+        Seq(Row(2), Row(3)))
+
+      // Real cycle across the two namespaces must still be caught.
+      val ex = intercept[AnalysisException] {
+        sql("ALTER VIEW view_catalog.ns1.inner.v_alter AS " +
+          "SELECT x FROM view_catalog.ns2.inner.v_alter")
+      }
+      assert(ex.getCondition == "RECURSIVE_VIEW")
+    }
+  }
+
+  test("temp-object reference errors render the full multi-level namespace") {
+    // `verifyTemporaryObjectsNotExists` / `verifyAutoGeneratedAliasesNotExists` route the
+    // view name through `Seq[String]` rather than a 3-part `TableIdentifier`, so a
+    // temp-function reference inside `view_catalog.ns1.inner.v_tempfn` surfaces an error
+    // naming the full multi-part identifier. Routing through `asLegacyTableIdentifier`
+    // would collapse `ns1.inner` to the last segment and drop the outer `ns1` from the
+    // user-visible message.
+    withTable("spark_catalog.default.t") {
+      Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t")
+      spark.udf.register("temp_udf_multi", (i: Int) => i + 1)
+      val ex = intercept[AnalysisException] {
+        sql("CREATE VIEW view_catalog.ns1.inner.v_tempfn AS " +
+          "SELECT temp_udf_multi(x) FROM spark_catalog.default.t")
+      }
+      assert(ex.getCondition == "INVALID_TEMP_OBJ_REFERENCE")
+      assert(ex.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_tempfn`"),
+        s"expected full multi-part name, got: ${ex.getMessage}")
+    }
+  }
+
+  // --- v2 view DDL / inspection on a non-session v2 catalog ----------------------------
+  // ResolveSessionCatalog's `ResolvedViewIdentifier` matcher is gated on isSessionCatalog, so
+  // these plans flow through to DataSourceV2Strategy with a `ResolvedPersistentView` child.
+  // Each is handled by a dedicated v2 exec defined alongside the v1 commands.
+
+  private def seedV2View(name: String): Unit = {
+    sql(s"CREATE VIEW view_catalog.default.$name AS SELECT 1 AS x")
+  }
+
+  // Used by the REFRESH / ANALYZE pins below: those plans still don't have a v2 implementation
+  // and surface UNSUPPORTED_FEATURE.TABLE_OPERATION via DataSourceV2Strategy.
+  private def assertUnsupportedViewOp(statement: String): Unit = {
+    val ex = intercept[AnalysisException](sql(statement))
+    assert(ex.getCondition == "UNSUPPORTED_FEATURE.TABLE_OPERATION", s"got ${ex.getCondition}")
+  }
+
+  // SET / UNSET / SCHEMA / RENAME / SHOW CREATE / SHOW TBLPROPERTIES / SHOW COLUMNS /
+  // DESCRIBE TABLE on a v2 view live in the per-catalog test triplets under
+  // `sql.execution.command.{,v1/,v2/}`; see e.g. AlterViewSetTblPropertiesSuite{,Base}.
+
+  // These plans reach `DataSourceV2Strategy` with a `ResolvedPersistentView` child on a
+  // non-session v2 view (because `ResolvedV1TableOrViewIdentifier` now skips non-session views).
+  // Without explicit pins they would hit `QueryPlanner`'s `assert(pruned.hasNext, "No plan for
+  // ...")` and surface a raw AssertionError. Pin each to UNSUPPORTED_FEATURE.TABLE_OPERATION.
+
+  test("REFRESH TABLE on a v2 view is rejected") {
+    seedV2View("v_refresh")
+    assertUnsupportedViewOp("REFRESH TABLE view_catalog.default.v_refresh")
+  }
+
+  test("ANALYZE TABLE on a v2 view is rejected") {
+    seedV2View("v_analyze")
+    assertUnsupportedViewOp(
+      "ANALYZE TABLE view_catalog.default.v_analyze COMPUTE STATISTICS")
+  }
+
+  test("ANALYZE TABLE ... FOR COLUMNS on a v2 view is rejected") {
+    seedV2View("v_analyze_cols")
+    assertUnsupportedViewOp(
+      "ANALYZE TABLE view_catalog.default.v_analyze_cols COMPUTE STATISTICS FOR COLUMNS x")
+  }
+
+  test("DESCRIBE TABLE ... PARTITION on a v2 view is rejected") {
+    // The parser builds an `UnresolvedTableOrView` for DESCRIBE, so this reaches the v2
+    // strategy with a `ResolvedPersistentView` child. Without an explicit pin the planner
+    // falls through to a "No plan for DescribeTablePartition" assertion; pin it with
+    // FORBIDDEN_OPERATION/DESC PARTITION on VIEW to mirror the v1 runtime check in
+    // `DescribeTableCommand.describeDetailedPartitionInfo`.
+    seedV2View("v_desc_part")
+    val ex = intercept[AnalysisException] {
+      sql("DESCRIBE TABLE view_catalog.default.v_desc_part PARTITION (x = 1)")
+    }
+    assert(ex.getCondition == "FORBIDDEN_OPERATION", s"got ${ex.getCondition}")
+  }
+
+  test("DESCRIBE TABLE EXTENDED ... AS JSON on a v2 view succeeds") {
+    // `DescribeRelationJsonCommand` is a v1 runnable command that reads v1-shaped fields off
+    // a `CatalogTable`. For non-session v2 views the resolved `ResolvedPersistentView.info`
+    // is a plain `ViewInfo`; the command projects it to a `CatalogTable` via
+    // `V1Table.toCatalogTable` so DESC ... AS JSON works uniformly across session and
+    // non-session view catalogs.
+    seedV2View("v_desc_json")
+    val rows = sql(
+      "DESCRIBE TABLE EXTENDED view_catalog.default.v_desc_json AS JSON").collect()
+    assert(rows.length == 1, s"DESC AS JSON should produce one row, got: ${rows.length}")
+    val json = rows.head.getString(0)
+    assert(json.contains("\"v_desc_json\""), s"JSON output missing view name: $json")
+    assert(json.contains("\"VIEW\""), s"JSON output missing VIEW table_type: $json")
+  }
+
+  // DROP VIEW behavior tests live in the per-catalog triplet
+  // `sql.execution.command.{,v1/,v2/}.DropViewSuite{,Base}`.
+
+  // --- SHOW TABLES / SHOW VIEWS on a v2 catalog --------------------------------
+
+  private def seedV2Table(name: String): Unit = {
+    val catalog = spark.sessionState.catalogManager.catalog("view_catalog")
+      .asInstanceOf[TestingTableViewCatalog]
+    catalog.createTable(
+      Identifier.of(Array("default"), name),
+      new TableInfo.Builder()
+        .withSchema(new StructType().add("x", "int"))
+        .withTableType(TableSummary.EXTERNAL_TABLE_TYPE)
+        .build())
+  }
+
+  test("SHOW TABLES on a TableViewCatalog returns both tables and views (v1-parity)") {
+    // For a `TableViewCatalog` (a catalog exposing both tables and views in a shared
+    // identifier namespace), SHOW TABLES routes through `listTableAndViewSummaries` so views
+    // appear alongside tables -- matching the v1 SHOW TABLES output. Pure `TableCatalog`
+    // catalogs (no view mixin) continue to use `listTables` and return tables only.
+    seedV2View("v_in_show_tables")
+    seedV2Table("t_in_show_tables")
+    val rows = sql("SHOW TABLES IN view_catalog.default").collect()
+    val names = rows.map(_.getString(1)).toSet
+    assert(names.contains("t_in_show_tables"), s"table missing from SHOW TABLES: $names")
+    assert(names.contains("v_in_show_tables"), s"view missing from SHOW TABLES: $names")
+    rows.foreach(r => assert(!r.getBoolean(2), s"isTemporary must be false: $r"))
+  }
+
+  // SHOW VIEWS behavior tests live in the per-catalog triplet
+  // `sql.execution.command.{,v1/,v2/}.ShowViewsSuite{,Base}`.
+}
+
+/**
+ * A [[TableViewCatalog]]: round-trips [[MetadataTable]] for created views and tables and
+ * exposes a few canned read-only view fixtures (`test_view`, `test_unqualified_view`,
+ * `test_unqualified_multi`, plus an unqualified-target view at `ns1.ns2.t`) used by the
+ * view-read tests. Entries created via `createTable` / `createView` are distinguished by the
+ * stored value's runtime type (ViewInfo vs TableInfo). The single-RPC perf entry point
+ * [[loadTableOrView]] returns either kind; [[loadTable]] is tables-only per the
+ * [[TableCatalog#loadTable]] contract.
+ */
+class TestingTableViewCatalog extends TableViewCatalog {
+
+  // Holds entries (views and tables) created via createTable / createView within the session.
+  // Keyed by (namespace, name); the stored value's runtime type (ViewInfo vs TableInfo)
+  // distinguishes views from tables. Mixed-catalog: shared identifier namespace per the
+  // TableViewCatalog contract.
+  private val createdViews =
+    new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]()
+
+  // Canned read-only view fixtures, exposed only via the perf path (loadTableOrView). loadView
+  // does not need to expose them because the resolver routes TableViewCatalog reads through
+  // loadTableOrView.
+  private def fixtureView(ident: Identifier): Option[ViewInfo] = ident.name() match {
+    case "test_view" =>
+      Some(new ViewInfo.Builder()
+        .withSchema(new StructType().add("col", "string").add("i", "int"))
+        .withQueryText(
+          "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'")
+        .withSqlConfigs(java.util.Collections.singletonMap(
+          SQLConf.ANSI_ENABLED.key, (ident.namespace().head == "ansi").toString))
+        .build())
+    case "test_unqualified_view" =>
+      Some(new ViewInfo.Builder()
+        .withSchema(new StructType().add("col", "string"))
+        .withQueryText("SELECT col FROM t WHERE col = 'b'")
+        .withCurrentCatalog("spark_catalog")
+        .withCurrentNamespace(Array("default"))
+        .build())
+    case "test_unqualified_multi" =>
+      // View whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part). The
+      // unqualified `t` in the body must resolve via that captured context to
+      // view_catalog.ns1.ns2.t, which this catalog also serves (see `t` case below).
+      Some(new ViewInfo.Builder()
+        .withSchema(new StructType().add("col", "string"))
+        .withQueryText("SELECT col FROM t")
+        .withCurrentCatalog("view_catalog")
+        .withCurrentNamespace(Array("ns1", "ns2"))
+        .build())
+    case "t" if ident.namespace().toSeq == Seq("ns1", "ns2") =>
+      // Target of test_unqualified_multi's unqualified reference. Self-contained view so
+      // the test doesn't need external data.
+      Some(new ViewInfo.Builder()
+        .withSchema(new StructType().add("col", "string"))
+        .withQueryText("SELECT 'multi' AS col")
+        .build())
+    case _ => None
+  }
+
+  override def loadTableOrView(ident: Identifier): Table = {
+    // Single-RPC perf path: returns tables AND views (as MetadataTable). Stored entries
+    // win over fixture views (the fixture namespace is read-only and disjoint from
+    // createdViews in practice). loadTable, loadView, tableExists, viewExists all derive
+    // from this via the TableViewCatalog default impls.
+    val key = (ident.namespace().toSeq, ident.name())
+    Option(createdViews.get(key))
+      .orElse(fixtureView(ident))
+      .map(new MetadataTable(_, ident.toString))
+      .getOrElse(throw new NoSuchTableException(ident))
+  }
+
+  override def createTable(ident: Identifier, info: TableInfo): Table = {
+    // Mixed-catalog contract: createTable rejects when a view sits at ident with
+    // TableAlreadyExistsException. The shared `createdViews` keyspace makes `putIfAbsent`
+    // throw uniformly for both table-at-ident and view-at-ident collisions.
+    val key = (ident.namespace().toSeq, ident.name())
+    if (createdViews.putIfAbsent(key, info) != null) {
+      throw new TableAlreadyExistsException(ident)
+    }
+    new MetadataTable(info, ident.toString)
+  }
+
+  /** Test-only accessor: returns the stored TableInfo (table or view) for the identifier. */
+  def getStoredInfo(namespace: Array[String], name: String): TableInfo = {
+    Option(createdViews.get((namespace.toSeq, name))).getOrElse {
+      throw new NoSuchTableException(Identifier.of(namespace, name))
+    }
+  }
+
+  /** Test-only accessor: returns the stored ViewInfo; fails if the entry is not a view. */
+  def getStoredView(namespace: Array[String], name: String): ViewInfo = getStoredInfo(
+    namespace, name) match {
+    case v: ViewInfo => v
+    case _ => throw new IllegalStateException(
+      s"stored entry at ${namespace.mkString(".")}.$name is not a view")
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    throw new RuntimeException("shouldn't be called")
+  }
+  override def dropTable(ident: Identifier): Boolean = {
+    val key = (ident.namespace().toSeq, ident.name())
+    val existing = createdViews.get(key)
+    if (existing == null || existing.isInstanceOf[ViewInfo]) return false
+    createdViews.remove(key) != null
+  }
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    throw new RuntimeException("shouldn't be called")
+  }
+  override def listTables(namespace: Array[String]): Array[Identifier] = {
+    // Tables only -- views are listed via ViewCatalog.listViews per the new contract.
+    val targetNs = namespace.toSeq
+    val ids = new java.util.ArrayList[Identifier]()
+    createdViews.forEach { (key, info) =>
+      if (key._1 == targetNs && !info.isInstanceOf[ViewInfo]) {
+        ids.add(Identifier.of(key._1.toArray, key._2))
+      }
+    }
+    ids.toArray(new Array[Identifier](0))
+  }
+
+  // ViewCatalog methods. Storage is shared with TableCatalog (mixed-catalog pattern).
+
+  override def listViews(namespace: Array[String]): Array[Identifier] = {
+    val targetNs = namespace.toSeq
+    val ids = new java.util.ArrayList[Identifier]()
+    createdViews.forEach { (key, info) =>
+      if (key._1 == targetNs && info.isInstanceOf[ViewInfo]) {
+        ids.add(Identifier.of(key._1.toArray, key._2))
+      }
+    }
+    ids.toArray(new Array[Identifier](0))
+  }
+
+  override def createView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    val key = (ident.namespace().toSeq, ident.name())
+    if (createdViews.putIfAbsent(key, info) != null) {
+      throw new ViewAlreadyExistsException(ident)
+    }
+    info
+  }
+
+  override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    val key = (ident.namespace().toSeq, ident.name())
+    val existing = createdViews.get(key)
+    if (existing == null || !existing.isInstanceOf[ViewInfo]) {
+      throw new NoSuchViewException(ident)
+    }
+    createdViews.put(key, info)
+    info
+  }
+
+  override def dropView(ident: Identifier): Boolean = {
+    val key = (ident.namespace().toSeq, ident.name())
+    val existing = createdViews.get(key)
+    if (existing == null || !existing.isInstanceOf[ViewInfo]) return false
+    createdViews.remove(key) != null
+  }
+
+  override def renameView(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    val oldKey = (oldIdent.namespace().toSeq, oldIdent.name())
+    val newKey = (newIdent.namespace().toSeq, newIdent.name())
+    val existing = createdViews.get(oldKey)
+    if (existing == null || !existing.isInstanceOf[ViewInfo]) {
+      throw new NoSuchViewException(oldIdent)
+    }
+    if (createdViews.putIfAbsent(newKey, existing) != null) {
+      throw new ViewAlreadyExistsException(newIdent)
+    }
+    createdViews.remove(oldKey)
+  }
+
+  private var catalogName = ""
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    catalogName = name
+  }
+  override def name(): String = catalogName
+}
+
+/**
+ * A v2 catalog that does not implement ViewCatalog. Used by capability-gate tests: the gate
+ * fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in
+ * [[CheckViewReferences]] for CREATE VIEW -- in both cases before `loadTable` is called --
+ * so this catalog's content is intentionally empty.
+ */
+class TestingTableOnlyCatalog extends TableCatalog {
+  override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident)
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table =
+    throw new RuntimeException("shouldn't be called")
+  override def dropTable(ident: Identifier): Boolean = false
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit =
+    throw new RuntimeException("shouldn't be called")
+  override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty
+  private var catalogName = ""
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    catalogName = name
+  }
+  override def name(): String = catalogName
+}
+
+/**
+ * A pure [[ViewCatalog]] (no [[TableCatalog]] mixin). Used to exercise that the analyzer's
+ * resolution paths skip the `loadTable` step and fall through to `loadView` for catalogs that
+ * cannot host tables. Pre-seeds a single mutable view at `default.pure_v` so the read and
+ * ALTER VIEW tests can both reach it.
+ */
+class TestingViewOnlyCatalog extends ViewCatalog {
+  private val store =
+    new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), ViewInfo]()
+
+  // Seeded on first `initialize`. Filters `spark_catalog.default.t` so the read test can
+  // assert deterministic output. ALTER VIEW tests overwrite it via `replaceView`.
+  private def seedDefault(): Unit = {
+    val key = (Seq("default"), "pure_v")
+    if (!store.containsKey(key)) {
+      val info = new ViewInfo.Builder()
+        .withSchema(new StructType().add("x", "int"))
+        .withQueryText("SELECT x FROM spark_catalog.default.t WHERE x > 1")
+        .build()
+      store.put(key, info)
+    }
+  }
+
+  override def listViews(namespace: Array[String]): Array[Identifier] = {
+    val target = namespace.toSeq
+    val ids = new java.util.ArrayList[Identifier]()
+    store.forEach { (key, _) =>
+      if (key._1 == target) ids.add(Identifier.of(key._1.toArray, key._2))
+    }
+    ids.toArray(new Array[Identifier](0))
+  }
+
+  override def loadView(ident: Identifier): ViewInfo = {
+    val key = (ident.namespace().toSeq, ident.name())
+    Option(store.get(key)).getOrElse(throw new NoSuchViewException(ident))
+  }
+
+  override def createView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    val key = (ident.namespace().toSeq, ident.name())
+    if (store.putIfAbsent(key, info) != null) {
+      throw new ViewAlreadyExistsException(ident)
+    }
+    info
+  }
+
+  override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    val key = (ident.namespace().toSeq, ident.name())
+    if (!store.containsKey(key)) throw new NoSuchViewException(ident)
+    store.put(key, info)
+    info
+  }
+
+  override def dropView(ident: Identifier): Boolean = {
+    val key = (ident.namespace().toSeq, ident.name())
+    store.remove(key) != null
+  }
+
+  override def renameView(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    val oldKey = (oldIdent.namespace().toSeq, oldIdent.name())
+    val newKey = (newIdent.namespace().toSeq, newIdent.name())
+    val existing = store.get(oldKey)
+    if (existing == null) throw new NoSuchViewException(oldIdent)
+    if (store.putIfAbsent(newKey, existing) != null) {
+      throw new ViewAlreadyExistsException(newIdent)
+    }
+    store.remove(oldKey)
+  }
+
+  private var catalogName = ""
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    catalogName = name
+    seedDefault()
+  }
+  override def name(): String = catalogName
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala
index 30890200df79d..803dd35513f45 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala
@@ -109,7 +109,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
 
       collected = df.queryExecution.executedPlan.collect {
         case CommandResultExec(
-          _, AppendDataExec(_, _, write),
+          _, AppendDataExec(_, _, write, _, _),
           _) =>
           val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
           assert(append.info.options.get("write.split-size") === "10")
@@ -141,7 +141,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
       assert (collected.size == 1)
 
       collected = qe.executedPlan.collect {
-        case AppendDataExec(_, _, write) =>
+        case AppendDataExec(_, _, write, _, _) =>
           val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
           assert(append.info.options.get("write.split-size") === "10")
       }
@@ -168,7 +168,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
       assert (collected.size == 1)
 
       collected = qe.executedPlan.collect {
-        case AppendDataExec(_, _, write) =>
+        case AppendDataExec(_, _, write, _, _) =>
           val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
           assert(append.info.options.get("write.split-size") === "10")
       }
@@ -194,7 +194,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
 
       collected = df.queryExecution.executedPlan.collect {
         case CommandResultExec(
-          _, OverwriteByExpressionExec(_, _, write),
+          _, OverwriteByExpressionExec(_, _, write, _, _),
           _) =>
           val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
           assert(append.info.options.get("write.split-size") === "10")
@@ -227,7 +227,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
       assert (collected.size == 1)
 
       collected = qe.executedPlan.collect {
-        case OverwritePartitionsDynamicExec(_, _, write) =>
+        case OverwritePartitionsDynamicExec(_, _, write, _, _) =>
           val dynOverwrite = write.toBatch.asInstanceOf[InMemoryBaseTable#DynamicOverwrite]
           assert(dynOverwrite.info.options.get("write.split-size") === "10")
       }
@@ -254,7 +254,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
 
       collected = df.queryExecution.executedPlan.collect {
         case CommandResultExec(
-          _, OverwriteByExpressionExec(_, _, write),
+          _, OverwriteByExpressionExec(_, _, write, _, _),
           _) =>
           val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
           assert(append.info.options.get("write.split-size") === "10")
@@ -287,7 +287,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
       assert (collected.size == 1)
 
       collected = qe.executedPlan.collect {
-        case OverwriteByExpressionExec(_, _, write) =>
+        case OverwriteByExpressionExec(_, _, write, _, _) =>
           val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
           assert(append.info.options.get("write.split-size") === "10")
       }
@@ -317,7 +317,7 @@ class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
       assert (collected.size == 1)
 
       collected = qe.executedPlan.collect {
-        case OverwriteByExpressionExec(_, _, write) =>
+        case OverwriteByExpressionExec(_, _, write, _, _) =>
           val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
           assert(append.info.options.get("write.split-size") === "10")
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index d2cc342f48112..cb7531a0dbafd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -439,6 +439,7 @@ class DataSourceV2SQLSuiteV1Filter
     Seq((basicCatalog, basicIdentifier), (atomicCatalog, atomicIdentifier)).foreach {
       case (catalog, identifier) =>
         spark.sql(s"CREATE TABLE $identifier USING foo AS SELECT id, data FROM source")
+        checkInsertMetrics(identifier, numInsertedRows = 3)
 
         val table = catalog.loadTable(Identifier.of(Array(), "table_name"))
 
@@ -2966,13 +2967,13 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
-  test("View commands are not supported in v2 catalogs") {
+  test("View commands are not supported in v2 catalogs that don't implement ViewCatalog") {
     def validateViewCommand(sqlStatement: String): Unit = {
       val e = analysisException(sqlStatement)
       checkError(
         e,
-        condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
-        parameters = Map("catalogName" -> "`testcat`", "operation" -> "views"))
+        condition = "MISSING_CATALOG_ABILITY.VIEWS",
+        parameters = Map("plugin" -> "testcat"))
     }
 
     validateViewCommand("DROP VIEW testcat.v")
@@ -3912,8 +3913,8 @@ class DataSourceV2SQLSuiteV1Filter
         QueryTest.checkAnswer(
           descriptionDf.filter(
             "!(col_name in ('Catalog', 'Created Time', 'Created By', 'Database', " +
-              "'index', 'Location', 'Name', 'Owner', 'Provider', 'Table', 'Table Properties', " +
-              "'Type', '_partition', ''))"),
+              "'index', 'Location', 'Name', 'Namespace', 'Owner', 'Provider', 'Table', " +
+              "'Table Properties', 'Type', '_partition', ''))"),
           Seq(
             Row("# Detailed Table Information", "", ""),
             Row("# Column Default Values", "", ""),
@@ -4302,6 +4303,28 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-56587: Show table names for V2 write nodes in UI") {
+    val t1 = s"testcat.ns1.ns2.table_name"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING foo")
+      val df1 = sql(s"INSERT INTO $t1 VALUES (1, 'a')")
+      val executed1 = df1.queryExecution.executedPlan
+      assert(executed1.collect {
+        case org.apache.spark.sql.execution.CommandResultExec(
+            _, w: org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec, _) => w
+        case w: org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec => w
+      }.head.nodeName.contains("testcat.ns1.ns2.table_name"))
+
+      val df2 = sql(s"INSERT OVERWRITE $t1 VALUES (2, 'b')")
+      val executed2 = df2.queryExecution.executedPlan
+      assert(executed2.collect {
+        case org.apache.spark.sql.execution.CommandResultExec(
+            _, w: org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec, _) => w
+        case w: org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec => w
+      }.head.nodeName.contains("testcat.ns1.ns2.table_name"))
+    }
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2TableSampleSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2TableSampleSuite.scala
new file mode 100644
index 0000000000000..164c098e95e8a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2TableSampleSuite.scala
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.{InMemoryTableWithJoinAndSampleCatalog, InMemoryTableWithLegacyTableSampleCatalog, InMemoryTableWithTableSampleCatalog}
+import org.apache.spark.sql.internal.SQLConf
+
+class DataSourceV2TableSampleSuite extends DatasourceV2SQLBase
+  with DataSourcePushdownTestUtils {
+
+  private val sampleCatalog = "testsample"
+
+  private def withSampleTable(testFunc: String => Unit): Unit = {
+    registerCatalog(sampleCatalog, classOf[InMemoryTableWithTableSampleCatalog])
+    val tableName = s"$sampleCatalog.ns.sample_tbl"
+    sql(s"CREATE TABLE $tableName (id bigint, data string) USING _")
+    try {
+      sql(s"INSERT INTO $tableName VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')")
+      testFunc(tableName)
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $tableName")
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM pushdown removes Sample node") {
+    withSampleTable { table =>
+      val df = sql(s"SELECT * FROM $table TABLESAMPLE SYSTEM (50 PERCENT)")
+      checkSamplePushed(df, pushed = true)
+      checkPushedInfo(df, "SYSTEM SAMPLE (50.0) false SEED(")
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE BERNOULLI pushdown removes Sample node") {
+    withSampleTable { table =>
+      val df = sql(s"SELECT * FROM $table TABLESAMPLE BERNOULLI (50 PERCENT)")
+      checkSamplePushed(df, pushed = true)
+      checkPushedInfo(df, "BERNOULLI SAMPLE (50.0) false SEED(")
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE default (no qualifier) pushdown removes Sample node") {
+    withSampleTable { table =>
+      val df = sql(s"SELECT * FROM $table TABLESAMPLE (50 PERCENT)")
+      checkSamplePushed(df, pushed = true)
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM 0 PERCENT returns no rows") {
+    withSampleTable { table =>
+      val df = sql(s"SELECT * FROM $table TABLESAMPLE SYSTEM (0 PERCENT)")
+      checkSamplePushed(df, pushed = true)
+      assert(df.collect().isEmpty)
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM 100 PERCENT returns all rows") {
+    withSampleTable { table =>
+      val df = sql(s"SELECT * FROM $table TABLESAMPLE SYSTEM (100 PERCENT)")
+      checkSamplePushed(df, pushed = true)
+      assert(df.collect().length == 5)
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM composes with projection") {
+    withSampleTable { table =>
+      val df = sql(s"SELECT id FROM $table TABLESAMPLE SYSTEM (100 PERCENT)")
+      checkSamplePushed(df, pushed = true)
+      assert(df.columns.sameElements(Array("id")))
+      assert(df.collect().length == 5)
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE on non-pushdown catalog falls back to Sample node") {
+    val table = "testcat.ns.no_sample_tbl"
+    sql(s"CREATE TABLE $table (id bigint, data string) USING _")
+    try {
+      sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      val df = sql(s"SELECT * FROM $table TABLESAMPLE (50 PERCENT)")
+      // testcat uses InMemoryCatalog which does NOT implement SupportsPushDownTableSample,
+      // so the Sample node should remain in the plan.
+      checkSamplePushed(df, pushed = false)
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $table")
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM on non-pushdown catalog errors") {
+    val table = "testcat.ns.no_sample_tbl"
+    sql(s"CREATE TABLE $table (id bigint, data string) USING _")
+    try {
+      sql(s"INSERT INTO $table VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      // testcat uses InMemoryCatalog whose ScanBuilder does not implement
+      // SupportsPushDownTableSample, so SYSTEM sampling cannot be pushed down.
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM $table TABLESAMPLE SYSTEM (50 PERCENT)").collect()
+        },
+        condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM")
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $table")
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM on subquery errors") {
+    withSampleTable { table =>
+      // SYSTEM sampling requires a direct table scan; applying it to a derived
+      // query (here an aggregate) means there is no ScanBuilderHolder to push into.
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM (SELECT id, count(*) AS cnt FROM $table GROUP BY id) " +
+            s"TABLESAMPLE SYSTEM (50 PERCENT)").collect()
+        },
+        condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_NO_SCAN")
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM with WHERE filter errors") {
+    withSampleTable { table =>
+      // A WHERE clause between the Sample and the scan produces a non-empty filter list
+      // in PhysicalOperation, which falls through to the catch-all error branch.
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM (SELECT * FROM $table WHERE id > 1) " +
+            s"TABLESAMPLE SYSTEM (50 PERCENT)").collect()
+        },
+        condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_NO_SCAN")
+    }
+  }
+
+  test("SPARK-55978: TABLESAMPLE SYSTEM on DSv1 table errors") {
+    withTable("dsv1_tbl") {
+      sql("CREATE TABLE dsv1_tbl (id bigint, data string) USING parquet")
+      sql("INSERT INTO dsv1_tbl VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      // DSv1 tables have no ScanBuilderHolder, so SYSTEM sampling cannot be pushed down.
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM dsv1_tbl TABLESAMPLE SYSTEM (50 PERCENT)").collect()
+        },
+        condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM_NO_SCAN")
+    }
+  }
+
+  test("SPARK-55978: join pushdown is skipped when a side has a pushed sample") {
+    val joinSampleCatalog = "testjoinsample"
+    registerCatalog(joinSampleCatalog, classOf[InMemoryTableWithJoinAndSampleCatalog])
+    val t1 = s"$joinSampleCatalog.ns.t1"
+    val t2 = s"$joinSampleCatalog.ns.t2"
+    sql(s"CREATE TABLE $t1 (id bigint, data string) USING _")
+    sql(s"CREATE TABLE $t2 (id bigint, data string) USING _")
+    try {
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      sql(s"INSERT INTO $t2 VALUES (2, 'x'), (3, 'y'), (4, 'z')")
+      withSQLConf(SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
+        // Without sample: join should be pushed down
+        val dfNoSample = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.id = $t2.id")
+        checkJoinPushed(dfNoSample)
+
+        // With a SYSTEM sample (fraction < 1) on one side: join pushdown
+        // should be skipped because the merged scan builder would silently
+        // discard the sample.
+        val dfWithSample = sql(
+          s"SELECT * FROM $t1 TABLESAMPLE SYSTEM (50 PERCENT) " +
+          s"JOIN $t2 ON $t1.id = $t2.id")
+        checkJoinNotPushed(dfWithSample)
+        // The sample should still be pushed down though
+        checkSamplePushed(dfWithSample, pushed = true)
+      }
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $t1")
+      sql(s"DROP TABLE IF EXISTS $t2")
+    }
+  }
+
+  test("SPARK-55978: 100% SYSTEM sample does not block join pushdown") {
+    val joinSampleCatalog = "testjoinsample100"
+    registerCatalog(joinSampleCatalog, classOf[InMemoryTableWithJoinAndSampleCatalog])
+    val t1 = s"$joinSampleCatalog.ns.t1"
+    val t2 = s"$joinSampleCatalog.ns.t2"
+    sql(s"CREATE TABLE $t1 (id bigint, data string) USING _")
+    sql(s"CREATE TABLE $t2 (id bigint, data string) USING _")
+    try {
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      sql(s"INSERT INTO $t2 VALUES (2, 'x'), (3, 'y'), (4, 'z')")
+      withSQLConf(SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
+        // At fraction = 1 the sample is a no-op on the result set, so
+        // dropping it inside the merged scan builder is safe. The guard
+        // in V2ScanRelationPushDown short-circuits and join pushdown
+        // proceeds.
+        val dfWithSample = sql(
+          s"SELECT * FROM $t1 TABLESAMPLE SYSTEM (100 PERCENT) " +
+          s"JOIN $t2 ON $t1.id = $t2.id")
+        checkJoinPushed(dfWithSample)
+      }
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $t1")
+      sql(s"DROP TABLE IF EXISTS $t2")
+    }
+  }
+
+  test("SPARK-55978: with-replacement sample blocks join pushdown even at fraction 1") {
+    val joinSampleCatalog = "testjoinsamplerepl"
+    registerCatalog(joinSampleCatalog, classOf[InMemoryTableWithJoinAndSampleCatalog])
+    val t1 = s"$joinSampleCatalog.ns.t1"
+    val t2 = s"$joinSampleCatalog.ns.t2"
+    sql(s"CREATE TABLE $t1 (id bigint, data string) USING _")
+    sql(s"CREATE TABLE $t2 (id bigint, data string) USING _")
+    try {
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      sql(s"INSERT INTO $t2 VALUES (2, 'x'), (3, 'y'), (4, 'z')")
+      withSQLConf(SQLConf.DATA_SOURCE_V2_JOIN_PUSHDOWN.key -> "true") {
+        // SQL TABLESAMPLE always sets withReplacement=false, so use the
+        // DataFrame API. Poisson sampling at fraction 1 still emits each
+        // input row 0, 1, 2, ... times, so the sample is not a no-op and
+        // join pushdown must remain blocked.
+        val df = spark.table(t1).sample(withReplacement = true, fraction = 1.0)
+          .join(spark.table(t2), "id")
+        checkJoinNotPushed(df)
+        checkSamplePushed(df, pushed = true)
+      }
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $t1")
+      sql(s"DROP TABLE IF EXISTS $t2")
+    }
+  }
+
+  test("SPARK-55978: legacy connector with only 4-arg pushTableSample - BERNOULLI pushes down") {
+    val legacyCatalog = "testlegacysample"
+    registerCatalog(legacyCatalog, classOf[InMemoryTableWithLegacyTableSampleCatalog])
+    val tableName = s"$legacyCatalog.ns.legacy_tbl"
+    sql(s"CREATE TABLE $tableName (id bigint, data string) USING _")
+    try {
+      sql(s"INSERT INTO $tableName VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      // BERNOULLI should push down via the default 5-arg method delegating to 4-arg
+      val dfBernoulli = sql(s"SELECT * FROM $tableName TABLESAMPLE (50 PERCENT)")
+      checkSamplePushed(dfBernoulli, pushed = true)
+
+      // SYSTEM should fail because the default 5-arg method returns false for SYSTEM,
+      // and SYSTEM requires successful pushdown.
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM $tableName TABLESAMPLE SYSTEM (50 PERCENT)").collect()
+        },
+        condition = "UNSUPPORTED_FEATURE.TABLESAMPLE_SYSTEM")
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $tableName")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuiteBase.scala
index 2682487e51ba0..b894d5d75b3c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuiteBase.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.sql.connector
 
-import org.apache.spark.sql.Row
+import org.apache.spark.internal.config
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.expressions.CheckInvariant
 import org.apache.spark.sql.catalyst.plans.logical.Filter
+import org.apache.spark.sql.connector.catalog.{Aborted, Committed}
 import org.apache.spark.sql.connector.catalog.InMemoryTable
 import org.apache.spark.sql.connector.write.DeleteSummary
 import org.apache.spark.sql.execution.datasources.v2.{DeleteFromTableExec, ReplaceDataExec, WriteDeltaExec}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources
 
 abstract class DeleteFromTableSuiteBase extends RowLevelOperationSuiteBase {
 
@@ -179,6 +183,66 @@ abstract class DeleteFromTableSuiteBase extends RowLevelOperationSuiteBase {
     checkDeleteMetrics(numDeletedRows = 0, numCopiedRows = 0)
   }
 
+  test("delete with literal false condition") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": 3, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE false")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(1, 1, "hr") :: Row(2, 2, "software") :: Row(3, 3, "hr") :: Nil)
+
+    checkDeleteMetrics(numDeletedRows = 0, numCopiedRows = 0)
+  }
+
+  test("delete with literal true condition") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": 3, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE true")
+
+    checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil)
+  }
+
+  test("delete with NULL equality on VOID column") {
+    createAndInitTable("pk INT NOT NULL, v VOID, dep STRING",
+      """{ "pk": 1, "v": null, "dep": "hr" }
+        |{ "pk": 2, "v": null, "dep": "software" }
+        |{ "pk": 3, "v": null, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE v = NULL")
+
+    checkAnswer(
+      sql(s"SELECT pk, dep FROM $tableNameAsString"),
+      Row(1, "hr") :: Row(2, "software") :: Row(3, "hr") :: Nil)
+
+    checkDeleteMetrics(numDeletedRows = 0, numCopiedRows = 0)
+  }
+
+  test("delete with NULL condition on non-null column") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": 3, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE pk = NULL")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(1, 1, "hr") :: Row(2, 2, "software") :: Row(3, 3, "hr") :: Nil)
+
+    checkDeleteMetrics(numDeletedRows = 0, numCopiedRows = 0)
+  }
+
   test("delete with basic filters") {
     createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
       """{ "pk": 1, "id": 1, "dep": "hr" }
@@ -363,6 +427,46 @@ abstract class DeleteFromTableSuiteBase extends RowLevelOperationSuiteBase {
     }
   }
 
+  test("metric values are stable across stage retries") {
+    // Force a shuffle in the DELETE plan via an IN-subquery (with broadcast disabled), then
+    // have the DAGScheduler corrupt the first attempt of every upstream shuffle map stage.
+    // The scan-side numOutputRows doubles up across attempts, and the driver-side derivation
+    // numDeletedRows = numScannedRows - numCopiedRows in `ReplaceDataExec.getWriteSummary`
+    // propagates that doubling into `DeleteSummary`. With SQLLastAttemptMetric on the scan,
+    // the surfaced numDeletedRows stays correct. (The current fetch-failure injection does
+    // not retry the writer stage, so writer-side numCopiedRows isn't actually exercised
+    // here; follow-up #55738 will fill that gap.)
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("source") {
+        createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+          """{ "pk": 1, "salary": 100, "dep": "hr" }
+            |{ "pk": 2, "salary": 200, "dep": "software" }
+            |{ "pk": 3, "salary": 300, "dep": "hr" }
+            |{ "pk": 4, "salary": 400, "dep": "software" }
+            |""".stripMargin)
+
+        val sourceDF = Seq(1, 2).toDF("pk")
+        sourceDF.createOrReplaceTempView("source")
+
+        withSparkContextConf(
+            config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key -> "true") {
+          sql(
+            s"""DELETE FROM $tableNameAsString
+               |WHERE pk IN (SELECT pk FROM source)
+               |""".stripMargin)
+        }
+
+        checkDeleteMetrics(numDeletedRows = 2, numCopiedRows = 2)
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Seq(
+            Row(3, 300, "hr"),
+            Row(4, 400, "software")))
+      }
+    }
+  }
+
   test("delete with NOT IN subqueries") {
     withTempView("deleted_id", "deleted_dep") {
       createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
@@ -713,6 +817,213 @@ abstract class DeleteFromTableSuiteBase extends RowLevelOperationSuiteBase {
     }
   }
 
+  test("delete with analysis failure and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    val exception = intercept[AnalysisException] {
+      sql(s"DELETE FROM $tableNameAsString WHERE invalid_column = 1")
+    }
+
+    assert(exception.getMessage.contains("invalid_column"))
+    assert(catalog.lastTransaction.currentState == Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  test("delete with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // simple predicate delete: goes through SupportsDelete.deleteWhere (no Spark-side scan)
+    val (txn, _) = executeTransaction {
+      sql(s"DELETE FROM $tableNameAsString WHERE dep = 'hr'")
+    }
+
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(table.version() == "2")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(Row(2, 200, "software")))
+  }
+
+  test("delete with subquery on source table and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'hr'), (4, 400, 'finance')")
+
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE pk IN (SELECT pk FROM $sourceNameAsString WHERE dep = 'hr')
+           |""".stripMargin)
+    }
+
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 2)
+    assert(table.version() == "2")
+
+    val sourceTxnTable = txnTables(sourceNameAsString)
+    val expectedNumSourceScans = if (deltaDelete) 1 else 2
+    assert(sourceTxnTable.scanEvents.size == expectedNumSourceScans)
+
+    val numSubquerySourceScans = sourceTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    }
+    assert(numSubquerySourceScans == expectedNumSourceScans)
+
+    val targetTxnTable = txnTables(tableNameAsString)
+    val expectedNumTargetScans = if (deltaDelete) 1 else 2
+    assert(targetTxnTable.scanEvents.size == expectedNumTargetScans)
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(2, 200, "software"), // unchanged
+        Row(3, 300, "hr")))      // unchanged (pk 3 not in subquery result)
+  }
+
+  test("delete with CTE and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'hr'), (4, 400, 'finance')")
+
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""WITH cte AS (
+           |  SELECT pk FROM $sourceNameAsString WHERE dep = 'hr'
+           |)
+           |DELETE FROM $tableNameAsString
+           |WHERE pk IN (SELECT pk FROM cte)
+           |""".stripMargin)
+    }
+
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 2)
+    assert(table.version() == "2")
+
+    val targetTxnTable = txnTables(tableNameAsString)
+    val expectedNumTargetScans = if (deltaDelete) 1 else 2
+    assert(targetTxnTable.scanEvents.size == expectedNumTargetScans)
+
+    val sourceTxnTable = txnTables(sourceNameAsString)
+    val expectedNumSourceScans = if (deltaDelete) 1 else 2
+    assert(sourceTxnTable.scanEvents.size == expectedNumSourceScans)
+
+    val numCteSourceScans = sourceTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    }
+    assert(numCteSourceScans == expectedNumSourceScans)
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(2, 200, "software"), // unchanged
+        Row(3, 300, "hr")))      // unchanged (pk 3 not in source)
+  }
+
+  test("delete using view with transactional checks") {
+    withView("temp_view") {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |{ "pk": 3, "salary": 300, "dep": "hr" }
+          |""".stripMargin)
+
+      sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+      sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'hr'), (4, 400, 'finance')")
+
+      sql(
+        s"""CREATE VIEW temp_view AS
+           |SELECT pk FROM $sourceNameAsString WHERE dep = 'hr'
+           |""".stripMargin)
+
+      val (txn, txnTables) = executeTransaction {
+        sql(s"DELETE FROM $tableNameAsString WHERE pk IN (SELECT pk FROM temp_view)")
+      }
+
+      assert(txn.currentState == Committed)
+      assert(txn.isClosed)
+      assert(txnTables.size == 2)
+      assert(table.version() == "2")
+
+      val targetTxnTable = txnTables(tableNameAsString)
+      val expectedNumTargetScans = if (deltaDelete) 1 else 2
+      assert(targetTxnTable.scanEvents.size == expectedNumTargetScans)
+
+      val sourceTxnTable = txnTables(sourceNameAsString)
+      val expectedNumSourceScans = if (deltaDelete) 1 else 2
+      assert(sourceTxnTable.scanEvents.size == expectedNumSourceScans)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(2, 200, "software"), // unchanged
+          Row(3, 300, "hr")))      // unchanged (pk 3 not in source)
+    }
+  }
+
+  test("EXPLAIN DELETE SQL with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    sql(s"EXPLAIN DELETE FROM $tableNameAsString WHERE dep = 'hr'")
+
+    // EXPLAIN should not start a new transaction
+    assert(catalog.transaction === null)
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software")))
+  }
+
+  test("delete with NOT IN over empty subquery") {
+    withTempView("empty_subq") {
+      createAndInitTable("pk INT NOT NULL, id INT NOT NULL, dep STRING",
+        """{ "pk": 1, "id": 1, "dep": "hr" }
+          |{ "pk": 2, "id": 2, "dep": "hr" }
+          |{ "pk": 3, "id": 3, "dep": "hr" }
+          |""".stripMargin)
+
+      Seq.empty[Int].toDF("v").createOrReplaceTempView("empty_subq")
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE id NOT IN (SELECT v FROM empty_subq)
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil)
+      // The filter gets replaced by an EmptyRelation in the ReplaceData executed plan, which hides
+      // the executed BatchScan and prevents computing numDeletedRows using numOutputRows of the
+      // scan node.
+      checkDeleteMetrics(numDeletedRows = if (deltaDelete) 3 else -1, numCopiedRows = 0)
+    }
+  }
+
   private def executeDeleteWithFilters(query: String): Unit = {
     val executedPlan = executeAndKeepPlan {
       sql(query)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedMergeIntoTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedMergeIntoTableSuite.scala
index bb4f47fbf63e4..53073628f2744 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedMergeIntoTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedMergeIntoTableSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.connector
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.Exists
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 
@@ -37,6 +38,177 @@ class DeltaBasedMergeIntoTableSuite extends DeltaBasedMergeIntoTableSuiteBase {
     props
   }
 
+  test("merge runtime filtering is disabled with NOT MATCHED BY SOURCE clauses") {
+    withTempView("source") {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "hr" }
+          |{ "pk": 3, "salary": 300, "dep": "hr" }
+          |{ "pk": 4, "salary": 400, "dep": "software" }
+          |{ "pk": 5, "salary": 500, "dep": "software" }
+          |""".stripMargin)
+
+      val sourceDF = Seq(1, 2, 3, 6).toDF("pk")
+      sourceDF.createOrReplaceTempView("source")
+
+      executeAndCheckScans(
+        s"""MERGE INTO $tableNameAsString t
+           |USING source s
+           |ON t.pk = s.pk
+           |WHEN MATCHED THEN
+           | UPDATE SET t.salary = t.salary + 1
+           |WHEN NOT MATCHED THEN
+           | INSERT (pk, salary, dep) VALUES (s.pk, 0, 'hr')
+           |WHEN NOT MATCHED BY SOURCE THEN
+           | DELETE
+           |""".stripMargin,
+        primaryScanSchema = "pk INT, salary INT, dep STRING, _partition STRING",
+        groupFilterScanSchema = None)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(1, 101, "hr"), // update
+          Row(2, 201, "hr"), // update
+          Row(3, 301, "hr"), // update
+          Row(6, 0, "hr"))) // insert
+    }
+  }
+
+  test("merge runtime group filtering (DPP enabled)") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      checkMergeRuntimeGroupFiltering()
+    }
+  }
+
+  test("merge runtime group filtering (DPP disabled)") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "false") {
+      checkMergeRuntimeGroupFiltering()
+    }
+  }
+
+  test("merge runtime group filtering (AQE enabled)") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      checkMergeRuntimeGroupFiltering()
+    }
+  }
+
+  test("merge runtime group filtering (AQE disabled)") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      checkMergeRuntimeGroupFiltering()
+    }
+  }
+
+  private def checkMergeRuntimeGroupFiltering(): Unit = {
+    withTempView("source") {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "hr" }
+          |{ "pk": 3, "salary": 300, "dep": "hr" }
+          |{ "pk": 4, "salary": 400, "dep": "software" }
+          |{ "pk": 5, "salary": 500, "dep": "software" }
+          |""".stripMargin)
+
+      val sourceDF = Seq(1, 2, 3, 6).toDF("pk")
+      sourceDF.createOrReplaceTempView("source")
+
+      executeAndCheckScans(
+        s"""MERGE INTO $tableNameAsString t
+           |USING source s
+           |ON t.pk = s.pk
+           |WHEN MATCHED THEN
+           | UPDATE SET t.salary = t.salary + 1
+           |WHEN NOT MATCHED THEN
+           | INSERT (pk, salary, dep) VALUES (s.pk, 0, 'hr')
+           |""".stripMargin,
+        primaryScanSchema = "pk INT, salary INT, dep STRING, _partition STRING",
+        groupFilterScanSchema = Some("pk INT, dep STRING"))
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(1, 101, "hr"), // update
+          Row(2, 201, "hr"), // update
+          Row(3, 301, "hr"), // update
+          Row(4, 400, "software"), // unchanged
+          Row(5, 500, "software"), // unchanged
+          Row(6, 0, "hr"))) // insert
+    }
+  }
+
+  test("merge does not double plan table (group filter enabled)") {
+    withSQLConf(SQLConf.RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED.key -> "true") {
+      withTempView("source") {
+        createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+          """{ "pk": 1, "salary": 100, "dep": "hr" }
+            |{ "pk": 2, "salary": 200, "dep": "software" }
+            |{ "pk": 3, "salary": 300, "dep": "hr" }
+            |""".stripMargin)
+
+        sql(
+          s"""CREATE TEMP VIEW source AS
+             |SELECT pk, salary FROM $tableNameAsString WHERE salary > 150
+             |""".stripMargin)
+
+        val (_, groupFilterCond) = executeAndKeepConditions {
+          sql(
+            s"""MERGE INTO $tableNameAsString t
+               |USING source s
+               |ON t.pk = s.pk
+               |WHEN MATCHED THEN
+               | UPDATE SET t.salary = s.salary + 1
+               |WHEN NOT MATCHED THEN
+               | INSERT (pk, salary, dep) VALUES (s.pk, s.salary, 'new')
+               |""".stripMargin)
+        }
+
+        groupFilterCond match {
+          case Some(p: Exists) => assertNoScanPlanning(p.plan)
+          case _ => fail(s"unexpected group filter: $groupFilterCond")
+        }
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Seq(Row(1, 100, "hr"), Row(2, 201, "software"), Row(3, 301, "hr")))
+      }
+    }
+  }
+
+  test("merge does not double plan table (group filter disabled)") {
+    withSQLConf(SQLConf.RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED.key -> "false") {
+      withTempView("source") {
+        createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+          """{ "pk": 1, "salary": 100, "dep": "hr" }
+            |{ "pk": 2, "salary": 200, "dep": "software" }
+            |{ "pk": 3, "salary": 300, "dep": "hr" }
+            |""".stripMargin)
+
+        sql(
+          s"""CREATE TEMP VIEW source AS
+             |SELECT pk, salary FROM $tableNameAsString WHERE salary > 150
+             |""".stripMargin)
+
+        val (_, groupFilterCond) = executeAndKeepConditions {
+          sql(
+            s"""MERGE INTO $tableNameAsString t
+               |USING source s
+               |ON t.pk = s.pk
+               |WHEN MATCHED THEN
+               | UPDATE SET t.salary = s.salary + 1
+               |WHEN NOT MATCHED THEN
+               | INSERT (pk, salary, dep) VALUES (s.pk, s.salary, 'new')
+               |""".stripMargin)
+        }
+
+        assert(groupFilterCond.isEmpty, "group filter must be disabled")
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Seq(Row(1, 100, "hr"), Row(2, 201, "software"), Row(3, 301, "hr")))
+      }
+    }
+  }
+
   test("merge handles metadata columns correctly") {
     withTempView("source") {
       createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuite.scala
index 813e8779f5f91..aa02ab1189487 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.connector
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.InSubquery
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 
 class DeltaBasedUpdateTableSuite extends DeltaBasedUpdateTableSuiteBase {
@@ -93,6 +94,54 @@ class DeltaBasedUpdateTableSuite extends DeltaBasedUpdateTableSuiteBase {
     }
   }
 
+  test("update runtime group filtering (DPP enabled)") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      checkUpdateRuntimeGroupFiltering()
+    }
+  }
+
+  test("update runtime group filtering (DPP disabled)") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "false") {
+      checkUpdateRuntimeGroupFiltering()
+    }
+  }
+
+  test("update runtime group filtering (AQE enabled)") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      checkUpdateRuntimeGroupFiltering()
+    }
+  }
+
+  test("update runtime group filtering (AQE disabled)") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      checkUpdateRuntimeGroupFiltering()
+    }
+  }
+
+  private def checkUpdateRuntimeGroupFiltering(): Unit = {
+    withTable(tableNameAsString) {
+      withTempView("deleted_id") {
+        createAndInitTable("pk INT NOT NULL, id INT, salary INT, dep STRING",
+          """{ "pk": 1, "id": 1, "salary": 300, "dep": "hr" }
+            |{ "pk": 2, "id": 2, "salary": 150, "dep": "software" }
+            |{ "pk": 3, "id": 3, "salary": 120, "dep": "hr" }
+            |""".stripMargin)
+
+        val deletedIdDF = Seq(Some(1), None).toDF()
+        deletedIdDF.createOrReplaceTempView("deleted_id")
+
+        executeAndCheckScans(
+          s"UPDATE $tableNameAsString SET salary = -1 WHERE id IN (SELECT * FROM deleted_id)",
+          primaryScanSchema = "pk INT, id INT, dep STRING, _partition STRING",
+          groupFilterScanSchema = Some("id INT, dep STRING"))
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Row(1, 1, -1, "hr") :: Row(2, 2, 150, "software") :: Row(3, 3, 120, "hr") :: Nil)
+      }
+    }
+  }
+
   test("update does not double plan table") {
     createAndInitTable("pk INT NOT NULL, id INT, salary INT, dep STRING",
       """{ "pk": 1, "id": 1, "salary": 300, "dep": 'hr' }
@@ -112,7 +161,10 @@ class DeltaBasedUpdateTableSuite extends DeltaBasedUpdateTableSuiteBase {
       case _ => fail(s"unexpected condition: $cond")
     }
 
-    assert(groupFilterCond.isEmpty, "delta operations must not have group filter")
+    groupFilterCond match {
+      case Some(InSubquery(_, query)) => assertNoScanPlanning(query.plan)
+      case _ => fail(s"unexpected group filter: $groupFilterCond")
+    }
 
     checkAnswer(
       sql(s"SELECT * FROM $tableNameAsString"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala
index c2db54f8f724b..49e586535a0d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala
@@ -67,7 +67,8 @@ abstract class DeltaBasedUpdateTableSuiteBase extends UpdateTableSuiteBase {
         sql(s"UPDATE $tableNameAsString SET dep = 'invalid' WHERE id <= 1 AND rand() > 0.5")
       },
       condition = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
-      parameters = Map("sqlExprs" -> "\"((id <= 1) AND (rand() > 0.5))\""),
+      parameters = Map(
+        "sqlExprs" -> "\"((id <= 1) AND (rand() > 0.5))\", \"((id <= 1) AND (rand() > 0.5))\""),
       context = ExpectedContext(
         fragment = "UPDATE cat.ns1.test_table SET dep = 'invalid' WHERE id <= 1 AND rand() > 0.5",
         start = 0,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
index fa6edc96ec9fd..42017c2dd60eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
@@ -24,6 +24,9 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.TypeUtils._
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.InMemoryBaseTable
+import org.apache.spark.sql.connector.write.InsertSummary
+import org.apache.spark.sql.execution.datasources.v2.ExtractV2Table
 import org.apache.spark.sql.functions.{array, lit, map, struct}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
@@ -60,6 +63,7 @@ abstract class InsertIntoTests(
     sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
     val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
     doInsert(t1, df)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     verifyTable(t1, df)
   }
 
@@ -70,6 +74,7 @@ abstract class InsertIntoTests(
     val dfr = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("data", "id")
 
     doInsert(t1, dfr)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     verifyTable(t1, df)
   }
 
@@ -79,6 +84,7 @@ abstract class InsertIntoTests(
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
       doInsert(t1, df)
+      checkInsertMetrics(t1, numInsertedRows = 3)
       verifyTable(t1, df)
     }
   }
@@ -89,6 +95,7 @@ abstract class InsertIntoTests(
     val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
     val df2 = Seq((4L, "d"), (5L, "e"), (6L, "f")).toDF("id", "data")
     doInsert(t1, df)
+    checkInsertMetrics(t1, numInsertedRows = 3)
     doInsert(t1, df2, SaveMode.Overwrite)
     verifyTable(t1, df2)
   }
@@ -99,6 +106,7 @@ abstract class InsertIntoTests(
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
       val init = Seq((2L, "dummy"), (4L, "keep")).toDF("id", "data")
       doInsert(t1, init)
+      checkInsertMetrics(t1, numInsertedRows = 2)
 
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
       doInsert(t1, df, SaveMode.Overwrite)
@@ -114,6 +122,7 @@ abstract class InsertIntoTests(
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
         val init = Seq((2L, "dummy"), (4L, "keep")).toDF("id", "data")
         doInsert(t1, init)
+        checkInsertMetrics(t1, numInsertedRows = 2)
 
         val dfr = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("data", "id")
         doInsert(t1, dfr, SaveMode.Overwrite)
@@ -210,6 +219,15 @@ trait InsertIntoSQLOnlyTests
   /** Check that the results in `tableName` match the `expected` DataFrame. */
   protected def verifyTable(tableName: String, expected: DataFrame): Unit
 
+  protected def checkInsertMetrics(tableName: String, numInsertedRows: Long): Unit = {
+    val inMemoryTable = spark.table(tableName).queryExecution.analyzed.collectFirst {
+      case ExtractV2Table(t) => t.asInstanceOf[InMemoryBaseTable]
+    }.get
+    val summary = inMemoryTable.commits.last.writeSummary.get.asInstanceOf[InsertSummary]
+    assert(summary.numInsertedRows() === numInsertedRows,
+      s"Expected numInsertedRows=$numInsertedRows, got ${summary.numInsertedRows()}")
+  }
+
   protected val v2Format: String
   protected val catalogAndNamespace: String
 
@@ -293,6 +311,7 @@ trait InsertIntoSQLOnlyTests
       withTableAndData(t1) { view =>
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
         sql(s"INSERT INTO $t1 PARTITION (id = 23) SELECT data FROM $view")
+        checkInsertMetrics(t1, numInsertedRows = 3)
         verifyTable(t1, sql(s"SELECT 23, data FROM $view"))
       }
     }
@@ -303,6 +322,7 @@ trait InsertIntoSQLOnlyTests
         withTableAndData(t1) { view =>
           sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
           sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'also-deleted')")
+          checkInsertMetrics(t1, numInsertedRows = 2)
           sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id) SELECT * FROM $view")
           verifyTable(t1, Seq(
             (1, "a"),
@@ -317,6 +337,7 @@ trait InsertIntoSQLOnlyTests
       withTableAndData(t1) { view =>
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
         sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'keep')")
+        checkInsertMetrics(t1, numInsertedRows = 2)
         sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id) SELECT * FROM $view")
         verifyTable(t1, Seq(
           (1, "a"),
@@ -332,6 +353,7 @@ trait InsertIntoSQLOnlyTests
         withTableAndData(t1) { view =>
           sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
           sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'also-deleted')")
+          checkInsertMetrics(t1, numInsertedRows = 2)
           sql(s"INSERT OVERWRITE TABLE $t1 SELECT * FROM $view")
           verifyTable(t1, Seq(
             (1, "a"),
@@ -346,6 +368,7 @@ trait InsertIntoSQLOnlyTests
       withTableAndData(t1) { view =>
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
         sql(s"INSERT INTO $t1 VALUES (2L, 'dummy'), (4L, 'keep')")
+        checkInsertMetrics(t1, numInsertedRows = 2)
         sql(s"INSERT OVERWRITE TABLE $t1 SELECT * FROM $view")
         verifyTable(t1, Seq(
           (1, "a"),
@@ -361,6 +384,7 @@ trait InsertIntoSQLOnlyTests
         sql(s"CREATE TABLE $t1 (id bigint, data string, p1 int) " +
           s"USING $v2Format PARTITIONED BY (p1)")
         sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 23), (4L, 'keep', 2)")
+        checkInsertMetrics(t1, numInsertedRows = 2)
         sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p1 = 23) SELECT * FROM $view")
         verifyTable(t1, Seq(
           (1, "a", 23),
@@ -377,6 +401,7 @@ trait InsertIntoSQLOnlyTests
           sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " +
             s"USING $v2Format PARTITIONED BY (id, p)")
           sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'also-deleted', 2)")
+          checkInsertMetrics(t1, numInsertedRows = 2)
           sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id, p = 2) SELECT * FROM $view")
           verifyTable(t1, Seq(
             (1, "a", 2),
@@ -393,6 +418,7 @@ trait InsertIntoSQLOnlyTests
           sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " +
             s"USING $v2Format PARTITIONED BY (id, p)")
           sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'also-deleted', 2)")
+          checkInsertMetrics(t1, numInsertedRows = 2)
           sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2, id) SELECT * FROM $view")
           verifyTable(t1, Seq(
             (1, "a", 2),
@@ -409,6 +435,7 @@ trait InsertIntoSQLOnlyTests
           sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " +
             s"USING $v2Format PARTITIONED BY (id, p)")
           sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'also-deleted', 2)")
+          checkInsertMetrics(t1, numInsertedRows = 2)
           sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2) SELECT * FROM $view")
           verifyTable(t1, Seq(
             (1, "a", 2),
@@ -424,6 +451,7 @@ trait InsertIntoSQLOnlyTests
         sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " +
           s"USING $v2Format PARTITIONED BY (id, p)")
         sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)")
+        checkInsertMetrics(t1, numInsertedRows = 2)
         sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2, id) SELECT * FROM $view")
         verifyTable(t1, Seq(
           (1, "a", 2),
@@ -439,6 +467,7 @@ trait InsertIntoSQLOnlyTests
         sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " +
           s"USING $v2Format PARTITIONED BY (id, p)")
         sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)")
+        checkInsertMetrics(t1, numInsertedRows = 2)
         sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id, p = 2) SELECT * FROM $view")
         verifyTable(t1, Seq(
           (1, "a", 2),
@@ -454,6 +483,7 @@ trait InsertIntoSQLOnlyTests
         sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " +
           s"USING $v2Format PARTITIONED BY (id, p)")
         sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)")
+        checkInsertMetrics(t1, numInsertedRows = 2)
         sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (p = 2) SELECT * FROM $view")
         verifyTable(t1, Seq(
           (1, "a", 2),
@@ -469,6 +499,7 @@ trait InsertIntoSQLOnlyTests
         sql(s"CREATE TABLE $t1 (id bigint, data string, p int) " +
           s"USING $v2Format PARTITIONED BY (id, p)")
         sql(s"INSERT INTO $t1 VALUES (2L, 'dummy', 2), (4L, 'keep', 2)")
+        checkInsertMetrics(t1, numInsertedRows = 2)
         sql(s"INSERT OVERWRITE TABLE $t1 PARTITION (id = 2, p = 2) SELECT data FROM $view")
         verifyTable(t1, Seq(
           (2, "a", 2),
@@ -491,6 +522,7 @@ trait InsertIntoSQLOnlyTests
         df.where("true").take(5)
         df.where("true").tail(5)
 
+        checkInsertMetrics(t1, numInsertedRows = 3)
         verifyTable(t1, spark.table(view))
       }
     }
@@ -510,9 +542,11 @@ trait InsertIntoSQLOnlyTests
       withTable(t1) {
         sql(s"CREATE TABLE $t1 (c1 INT DEFAULT 42, c2 STRING DEFAULT 'hello') USING $v2Format")
         sql(s"INSERT INTO $t1 VALUES (1, DEFAULT)")
+        checkInsertMetrics(t1, numInsertedRows = 1)
         checkAnswer(sql(s"SELECT * FROM $t1"), Row(1, "hello"))
 
         sql(s"INSERT INTO $t1 VALUES (DEFAULT, DEFAULT)")
+        checkInsertMetrics(t1, numInsertedRows = 1)
         checkAnswer(
           sql(s"SELECT * FROM $t1 ORDER BY c1"),
           Seq(Row(1, "hello"), Row(42, "hello")))
@@ -540,6 +574,26 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       byName: Boolean = false,
       replaceWhere: Option[String] = None): Unit
 
+  /** Insert data into a table by name without schema evolution. */
+  protected def doInsertByName(
+      tableName: String,
+      insert: DataFrame,
+      mode: SaveMode = SaveMode.Append): Unit = {
+    val tmpView = "tmp_view"
+    withTempView(tmpView) {
+      insert.createOrReplaceTempView(tmpView)
+      val overwrite = if (mode == SaveMode.Overwrite) "OVERWRITE" else "INTO"
+      sql(s"INSERT $overwrite TABLE $tableName BY NAME SELECT * FROM $tmpView")
+    }
+  }
+
+  /** Run a block with INSERT nested type coercion enabled. */
+  protected def withInsertNestedTypeCoercion(f: => Unit): Unit = {
+    withSQLConf(SQLConf.INSERT_INTO_NESTED_TYPE_COERCION_ENABLED.key -> "true") {
+      f
+    }
+  }
+
   test("Insert schema evolution: extra column - no auto-schema-evolution capability") {
     val t1 = s"${catalogAndNamespace}tbl"
     withTable(t1) {
@@ -565,8 +619,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " +
         s"TBLPROPERTIES ('auto-schema-evolution' = 'false')")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // Same column count, no evolution needed: should succeed even without capability.
       doInsertWithSchemaEvolution(t1, Seq((2L, "b")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
     }
   }
@@ -576,7 +632,9 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1, Seq((2L, "b")).toDF("x", "y"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // No evolution
       verifyTable(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
     }
@@ -587,8 +645,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "b", true)).toDF("id", "data", "active"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq[(Long, String, java.lang.Boolean)](
         (1L, "a", null),
         (2L, "b", true)
@@ -601,8 +661,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "b", true, 100L)).toDF("id", "data", "active", "score"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq[(Long, String, java.lang.Boolean, java.lang.Long)](
         (1L, "a", null, null),
         (2L, "b", true, 100L)
@@ -615,7 +677,9 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1, Seq((2L, "b", true)).toDF("x", "y", "z"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq[(Long, String, java.lang.Boolean)](
         (1L, "a", null),
         (2L, "b", true)
@@ -629,6 +693,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsertWithSchemaEvolution(t1,
         Seq((1L, "a", true)).toDF("id", "data", "active"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq(
         (1L, "a", true)
       ).toDF("id", "data", "active"))
@@ -642,9 +707,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1, "Alice")).toDF("id", "name")
           .select($"id", struct($"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2, "Bob", 30)).toDF("id", "name", "age")
           .select($"id", struct($"name", $"age").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1, Row("Alice", null)), Row(2, Row("Bob", 30))))
@@ -658,9 +725,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1, "Alice")).toDF("id", "name")
           .select($"id", struct($"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2, "Bob", 30)).toDF("id", "firstName", "age")
           .select($"id", struct($"firstName", $"age").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1, Row("Alice", null)), Row(2, Row("Bob", 30))))
@@ -672,8 +741,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq(("b", true, 2L)).toDF("data", "active", "id"), byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq[(Long, String, java.lang.Boolean)](
         (1L, "a", null),
         (2L, "b", true)
@@ -688,10 +759,12 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1, "Alice")).toDF("id", "name")
           .select($"id", struct($"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2, 30, "Bob")).toDF("id", "age", "name")
           .select($"id", struct($"age", $"name").as("info")),
         byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1, Row("Alice", null)), Row(2, Row("Bob", 30))))
@@ -705,10 +778,12 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1, "Alice")).toDF("id", "name")
           .select($"id", struct($"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2, 30, "Bob", "NYC")).toDF("id", "age", "name", "city")
           .select($"id", struct($"age", $"name", $"city").as("info")),
         byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1, Row("Alice", null, null)), Row(2, Row("Bob", 30, "NYC"))))
@@ -720,8 +795,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq(("b", 2L)).toDF("data", "id"), byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // No evolution
       verifyTable(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
     }
@@ -732,8 +809,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq(("b", 2L)).toDF("x", "y"), byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq[(java.lang.Long, String, String, java.lang.Long)](
         (1L, "a", null, null),
         (null, null, "b", 2L)
@@ -748,6 +827,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1, "Alice")).toDF("id", "name")
           .select($"id", struct($"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2, 30, "Bob")).toDF("id", "age", "name")
           .select($"id", struct($"age", $"name").as("info")),
@@ -764,6 +844,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
       doInsert(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 2)
       // REPLACE WHERE only deletes rows matching the predicate, then inserts new data.
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "x", true), (4L, "y", false)).toDF("id", "data", "active"),
@@ -781,6 +862,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
       doInsert(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 2)
       doInsertWithSchemaEvolution(t1,
         Seq((true, "x", 2L), (false, "y", 4L)).toDF("active", "data", "id"),
         byName = true,
@@ -801,6 +883,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       val initDf = Seq((1L, "Alice"), (2L, "Bob")).toDF("id", "name")
         .select($"id", struct($"name").as("info"))
       doInsert(t1, initDf)
+      checkInsertMetrics(t1, numInsertedRows = 2)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bobby", 25)).toDF("id", "name", "age")
           .select($"id", struct($"name", $"age").as("info")),
@@ -820,6 +903,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       val initDf = Seq((1L, "Alice"), (2L, "Bob")).toDF("id", "name")
         .select($"id", struct($"name").as("info"))
       doInsert(t1, initDf)
+      checkInsertMetrics(t1, numInsertedRows = 2)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bobby", 25)).toDF("id", "name", "age")
           .select($"id", struct($"age", $"name").as("info")),
@@ -853,6 +937,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       withTable(t1) {
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
         doInsert(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
+        checkInsertMetrics(t1, numInsertedRows = 2)
         // Overwrite with schema evolution adding a new column, dynamic mode should only replace
         // partitions present in the inserted data.
         doInsertWithSchemaEvolution(t1,
@@ -874,6 +959,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       withTable(t1) {
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
         doInsert(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
+        checkInsertMetrics(t1, numInsertedRows = 2)
         doInsertWithSchemaEvolution(t1,
           Seq((true, "x", 2L), (false, "y", 3L)).toDF("active", "data", "id"),
           mode = SaveMode.Overwrite,
@@ -894,6 +980,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       withTable(t1) {
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
         doInsert(t1, Seq((1L, "a"), (2L, "b")).toDF("id", "data"))
+        checkInsertMetrics(t1, numInsertedRows = 2)
         // Static mode overwrites the entire table.
         doInsertWithSchemaEvolution(t1,
           Seq((2L, "x", true), (3L, "y", false)).toDF("id", "data", "active"),
@@ -949,8 +1036,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       sql(s"CREATE TABLE $t1 (id bigint) USING $v2Format")
       doInsertWithSchemaEvolution(t1,
         Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "b", true)).toDF("id", "data", "active"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       verifyTable(t1, Seq[(Long, String, java.lang.Boolean)](
         (1L, "a", null),
         (2L, "b", true)
@@ -965,9 +1054,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "Alice")).toDF("id", "name")
           .select($"id", struct(struct($"name").as("nested")).as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bob", 30)).toDF("id", "name", "age")
           .select($"id", struct(struct($"name", $"age").as("nested")).as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1L, Row(Row("Alice", null))), Row(2L, Row(Row("Bob", 30)))))
@@ -981,10 +1072,12 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "Alice")).toDF("id", "name")
           .select($"id", struct(struct($"name").as("nested")).as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bob", 30)).toDF("id", "name", "age")
           .select($"id", struct(struct($"age", $"name").as("nested")).as("info")),
         byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1L, Row(Row("Alice", null))), Row(2L, Row(Row("Bob", 30)))))
@@ -998,9 +1091,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "Alice")).toDF("id", "name")
           .select($"id", array(struct($"name")).as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bob", 30)).toDF("id", "name", "age")
           .select($"id", array(struct($"name", $"age")).as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(
@@ -1016,9 +1111,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "A", "Alice")).toDF("id", "key", "name")
           .select($"id", map($"key", struct($"name")).as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "B", "Bob", 30)).toDF("id", "key", "name", "age")
           .select($"id", map($"key", struct($"name", $"age")).as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(
@@ -1034,9 +1131,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "Alice", "A")).toDF("id", "name", "value")
           .select($"id", map(struct($"name"), $"value").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bob", 30, "B")).toDF("id", "name", "age", "value")
           .select($"id", map(struct($"name", $"age"), $"value").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(
@@ -1050,8 +1149,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id int, data string) USING $v2Format")
       doInsert(t1, Seq((1, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((Long.MaxValue, "b")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1L, "a"), Row(Long.MaxValue, "b")))
@@ -1064,8 +1165,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id int, data string) USING $v2Format")
       doInsert(t1, Seq((1, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq(("b", Long.MaxValue)).toDF("data", "id"), byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1L, "a"), Row(Long.MaxValue, "b")))
@@ -1078,8 +1181,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id int, data string) USING $v2Format")
       doInsert(t1, Seq((1, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((Long.MaxValue, "b", true)).toDF("id", "data", "active"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(
@@ -1098,9 +1203,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "Alice", 100)).toDF("id", "name", "value")
           .select($"id", struct($"value", $"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bob", Long.MaxValue)).toDF("id", "name", "value")
           .select($"id", struct($"value", $"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT id, info.value, info.name FROM $t1"),
         Seq(Row(1L, 100L, "Alice"), Row(2L, Long.MaxValue, "Bob")))
@@ -1116,10 +1223,12 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "Alice", 100)).toDF("id", "name", "value")
           .select($"id", struct($"value", $"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "Bob", Long.MaxValue)).toDF("id", "name", "value")
           .select($"id", struct($"name", $"value").as("info")),
         byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT id, info.value, info.name FROM $t1"),
         Seq(Row(1L, 100L, "Alice"), Row(2L, Long.MaxValue, "Bob")))
@@ -1135,9 +1244,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, 100)).toDF("id", "value")
           .select($"id", array(struct($"value")).as("arr")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, Long.MaxValue)).toDF("id", "value")
           .select($"id", array(struct($"value")).as("arr")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT id, arr[0].value FROM $t1"),
         Seq(Row(1L, 100L), Row(2L, Long.MaxValue)))
@@ -1154,9 +1265,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "k1", 100)).toDF("id", "key", "value")
           .select($"id", map($"key", struct($"value")).as("m")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((2L, "k2", Long.MaxValue)).toDF("id", "key", "value")
           .select($"id", map($"key", struct($"value")).as("m")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT id, m['k1'].value, m['k2'].value FROM $t1"),
         Seq(Row(1L, 100L, null), Row(2L, null, Long.MaxValue)))
@@ -1171,6 +1284,7 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id int, data string) USING $v2Format")
       doInsert(t1, Seq((1, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq((Long.MaxValue, "b")).toDF("id", "data"), mode = SaveMode.Overwrite)
       checkAnswer(
@@ -1185,8 +1299,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // Inserting an int into a long column should not narrow the schema.
       doInsertWithSchemaEvolution(t1, Seq((2, "b")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1L, "a"), Row(2L, "b")))
@@ -1200,9 +1316,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id float, data string) USING $v2Format")
       doInsert(t1, Seq((1f, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // Inserting a double into a float should widen the schema, inserting an int into a string
       // should retain the string type.
       doInsertWithSchemaEvolution(t1, Seq((2d, 3)).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1d, "a"), Row(2d, "3")))
@@ -1239,9 +1357,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // Insert a null value with NullType - should not change the target column type.
       doInsertWithSchemaEvolution(t1,
         Seq(2L).toDF("id").withColumn("data", lit(null)))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1L, "a"), Row(2L, null)))
@@ -1254,9 +1374,11 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
       doInsert(t1, Seq((1L, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       doInsertWithSchemaEvolution(t1,
         Seq(2L).toDF("id").withColumn("data", lit(null)),
         byName = true)
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1L, "a"), Row(2L, null)))
@@ -1271,10 +1393,12 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       doInsert(t1,
         Seq((1L, "Alice", 100)).toDF("id", "name", "value")
           .select($"id", struct($"value", $"name").as("info")))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // Insert with NullType for nested field - should not change the struct field type.
       doInsertWithSchemaEvolution(t1,
         Seq(2L).toDF("id")
           .withColumn("info", struct(lit(null).as("value"), lit("Bob").as("name"))))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT id, info.value, info.name FROM $t1"),
         Seq(Row(1L, 100, "Alice"), Row(2L, null, "Bob")))
@@ -1288,8 +1412,10 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id int, data string) USING $v2Format")
       doInsert(t1, Seq((1, "a")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       // Insert without schema evolution - should cast to target type, not widen.
       doInsert(t1, Seq((2L, "b")).toDF("id", "data"))
+      checkInsertMetrics(t1, numInsertedRows = 1)
       checkAnswer(
         sql(s"SELECT * FROM $t1"),
         Seq(Row(1, "a"), Row(2, "b")))
@@ -1297,4 +1423,722 @@ trait InsertIntoSchemaEvolutionTests { this: InsertIntoTests =>
       assert(spark.table(t1).schema("id").dataType === IntegerType)
     }
   }
+
+  // ---------------------------------------------------------------------------
+  // Tests for source with fewer columns/fields than target
+  // ---------------------------------------------------------------------------
+
+  test("Insert schema evolution: source missing top-level column by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val schema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("salary", IntegerType),
+        StructField("dep", StringType)))
+      val data = Seq(Row(0, 100, "sales"))
+      sql(s"CREATE TABLE $t1 (id int, salary int, dep string) USING $v2Format")
+      doInsert(t1, spark.createDataFrame(spark.sparkContext.parallelize(data), schema))
+      doInsertWithSchemaEvolution(t1,
+        Seq((1, "engineering")).toDF("id", "dep"),
+        byName = true)
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, 100, "sales"), Row(1, null, "engineering")))
+    }
+  }
+
+  test("Insert schema evolution: source missing top-level column by position") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, salary int, dep string) USING $v2Format")
+      doInsert(t1, Seq((0, 100, "sales")).toDF("id", "salary", "dep"))
+      // By position: source col 1 maps to target col 1, source col 2 maps to target col 2,
+      // trailing target col 3 is filled with null.
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1,
+          Seq((1, 200)).toDF("id", "salary"))
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, 100, "sales"), Row(1, 200, null)))
+    }
+  }
+
+  test("Insert schema evolution: source missing top-level column with DEFAULT by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, salary int DEFAULT 200, dep string) USING $v2Format")
+      doInsert(t1, Seq((0, 100, "sales")).toDF("id", "salary", "dep"))
+      doInsertWithSchemaEvolution(t1,
+        Seq((1, "engineering")).toDF("id", "dep"),
+        byName = true)
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, 100, "sales"), Row(1, 200, "engineering")))
+    }
+  }
+
+  test("Insert schema evolution: source missing top-level column with DEFAULT by position") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, salary int, dep string DEFAULT 'unknown') USING $v2Format")
+      doInsert(t1, Seq((0, 100, "sales")).toDF("id", "salary", "dep"))
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1,
+          Seq((1, 200)).toDF("id", "salary"))
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, 100, "sales"), Row(1, 200, "unknown")))
+    }
+  }
+
+  test("Insert schema evolution: source missing nested struct field by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))), targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b")))), sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Row(1, "a", true)), Row(1, Row(10, "b", null))))
+    }
+  }
+
+  test("Insert schema evolution: source missing nested struct field by position") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))), targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b")))), sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Row(1, "a", true)), Row(1, Row(10, "b", null))))
+    }
+  }
+
+  test("Insert schema evolution: source missing field in struct nested in array by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("a", ArrayType(StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType)))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"a array<struct<c1:int,c2:string,c3:boolean>>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Seq(Row(1, "a", true))))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("a", ArrayType(StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType)))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Seq(Row(10, "b"))))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Seq(Row(1, "a", true))), Row(1, Seq(Row(10, "b", null)))))
+    }
+  }
+
+  test("Insert schema evolution: source missing field in struct nested in array by position") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("a", ArrayType(StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType)))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"a array<struct<c1:int,c2:string,c3:boolean>>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Seq(Row(1, "a", true))))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("a", ArrayType(StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType)))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Seq(Row(10, "b"))))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Seq(Row(1, "a", true))), Row(1, Seq(Row(10, "b", null)))))
+    }
+  }
+
+  test("Insert schema evolution: source missing deeply nested struct field by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StructType(Seq(
+            StructField("a", IntegerType),
+            StructField("b", BooleanType)))))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"s struct<c1:int,c2:struct<a:int,b:boolean>>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, Row(10, true))))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StructType(Seq(
+            StructField("a", IntegerType)))))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(20, Row(30))))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Row(1, Row(10, true))), Row(1, Row(20, Row(30, null)))))
+    }
+  }
+
+  test("Insert schema evolution: source with null struct and missing nested field by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", IntegerType))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"s struct<c1:int,c2:string,c3:int>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", 10)))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, null))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Row(1, "a", 10)), Row(1, null)))
+    }
+  }
+
+  test("Insert schema evolution: source with null struct and missing nested field by position") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", IntegerType))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"s struct<c1:int,c2:string,c3:int>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", 10)))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, null))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Row(1, "a", 10)), Row(1, null)))
+    }
+  }
+
+  test("Insert schema evolution: mixed null and non-null structs with missing field by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b")), Row(2, null))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Row(1, "a", true)), Row(1, Row(10, "b", null)), Row(2, null)))
+    }
+  }
+
+  test("Insert schema evolution: null deeply nested struct with missing field by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StructType(Seq(
+            StructField("a", IntegerType),
+            StructField("b", BooleanType)))))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"s struct<c1:int,c2:struct<a:int,b:boolean>>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, Row(10, true))))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StructType(Seq(
+            StructField("a", IntegerType)))))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(20, null)))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Row(1, Row(10, true))), Row(1, Row(20, null))))
+    }
+  }
+
+  test("Insert schema evolution: null struct in array with missing field by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("a", ArrayType(StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", BooleanType)))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"a array<struct<c1:int,c2:boolean>>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Seq(Row(1, true))))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("a", ArrayType(StructType(Seq(
+          StructField("c1", IntegerType)))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Seq(Row(10), null)))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(Row(0, Seq(Row(1, true))), Row(1, Seq(Row(10, null), null))))
+    }
+  }
+
+  test("Insert schema evolution: source missing field in struct nested in map value by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("m", MapType(StringType, StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", BooleanType)))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"m map<string, struct<c1:int,c2:boolean>>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Map("x" -> Row(1, true))))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("m", MapType(StringType, StructType(Seq(
+          StructField("c1", IntegerType)))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Map("y" -> Row(10))))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(
+          Row(0, Map("x" -> Row(1, true))),
+          Row(1, Map("y" -> Row(10, null)))))
+    }
+  }
+
+  test("Insert schema evolution: source missing field in struct nested in map value by position") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("m", MapType(StringType, StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", BooleanType)))))))
+      sql(s"CREATE TABLE $t1 (id int, " +
+        s"m map<string, struct<c1:int,c2:boolean>>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Map("x" -> Row(1, true))))),
+        targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("m", MapType(StringType, StructType(Seq(
+          StructField("c1", IntegerType)))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Map("y" -> Row(10))))),
+        sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData)
+      }
+      checkAnswer(
+        sql(s"SELECT * FROM $t1"),
+        Seq(
+          Row(0, Map("x" -> Row(1, true))),
+          Row(1, Map("y" -> Row(10, null)))))
+    }
+  }
+
+  test("Insert schema evolution: extra and missing top-level column by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, salary int, dep string) USING $v2Format")
+      doInsert(t1, Seq((0, 100, "sales")).toDF("id", "salary", "dep"))
+      // Source has "active" (extra) but is missing "salary". Column count is the same (3)
+      // but names differ; by-name resolution should add "active" via schema evolution
+      // and fill "salary" with null.
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1,
+          Seq((1, "engineering", true)).toDF("id", "dep", "active"),
+          byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT id, salary, dep, active FROM $t1"),
+        Seq(Row(0, 100, "sales", null), Row(1, null, "engineering", true)))
+    }
+  }
+
+  test("Insert schema evolution: extra and missing nested struct field by name") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))), targetSchema)
+      doInsert(t1, targetData)
+
+      // Source struct has "c1", "c2", "c4" (extra) but is missing "c3". Field count is the same
+      // (3) but names differ; by-name resolution should add "c4" via schema evolution and fill
+      // "c3" with null.
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c4", DoubleType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b", 3.14)))), sourceSchema)
+      withInsertNestedTypeCoercion {
+        doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+      }
+      checkAnswer(
+        sql(s"SELECT id, s.c1, s.c2, s.c3, s.c4 FROM $t1"),
+        Seq(Row(0, 1, "a", true, null), Row(1, 10, "b", null, 3.14)))
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Negative tests: missing columns/fields should fail WITHOUT schema evolution
+  // ---------------------------------------------------------------------------
+
+  test("Insert without evolution: source missing top-level column by name fails") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, salary int, dep string) USING $v2Format")
+      doInsert(t1, Seq((0, 100, "sales")).toDF("id", "salary", "dep"))
+      // Without explicit DEFAULT on `salary`, missing by-name data only errors when null-fill
+      // for missing defaults is disabled; otherwise FILL mode inserts null for `salary`.
+      withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") {
+        checkError(
+          exception = intercept[AnalysisException] {
+            doInsertByName(t1, Seq((1, "engineering")).toDF("id", "dep"))
+          },
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          parameters = Map(
+            "tableName" -> toSQLId(s"${catalogAndNamespace}tbl"),
+            "colName" -> "`salary`")
+        )
+      }
+    }
+  }
+
+  test("Insert schema evolution: source missing top-level column by position fails " +
+      "when null default disabled and column has no explicit DEFAULT") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, salary int, dep string) USING $v2Format")
+      doInsert(t1, Seq((0, 100, "sales")).toDF("id", "salary", "dep"))
+      withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") {
+        withInsertNestedTypeCoercion {
+          checkError(
+            exception = intercept[AnalysisException] {
+              doInsertWithSchemaEvolution(t1,
+                Seq((1, 200)).toDF("id", "salary"))
+            },
+            condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+            parameters = Map(
+              "tableName" -> toSQLId(s"${catalogAndNamespace}tbl"),
+              "colName" -> "`dep`")
+          )
+        }
+      }
+    }
+  }
+
+  test("Insert schema evolution: source missing nested struct field by position fails " +
+      "when null default disabled") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))), targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b")))), sourceSchema)
+      withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") {
+        withInsertNestedTypeCoercion {
+          checkError(
+            exception = intercept[AnalysisException] {
+              doInsertWithSchemaEvolution(t1, sourceData)
+            },
+            condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+            parameters = Map(
+              "tableName" -> toSQLId(s"${catalogAndNamespace}tbl"),
+              "colName" -> "`s`.`c3`")
+          )
+        }
+      }
+    }
+  }
+
+  test("Insert without evolution: source missing top-level column by position fails") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, salary int, dep string) USING $v2Format")
+      doInsert(t1, Seq((0, 100, "sales")).toDF("id", "salary", "dep"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          doInsert(t1, Seq((1, 200)).toDF("id", "salary"))
+        },
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        parameters = Map(
+          "tableName" -> toSQLId(s"${catalogAndNamespace}tbl"),
+          "tableColumns" -> "`id`, `salary`, `dep`",
+          "dataColumns" -> "`id`, `salary`")
+      )
+    }
+  }
+
+  test("Insert without evolution: source missing nested struct field by name fails") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))), targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b")))), sourceSchema)
+      checkError(
+        exception = intercept[AnalysisException] {
+          doInsertByName(t1, sourceData)
+        },
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        parameters = Map(
+          "tableName" -> toSQLId(s"${catalogAndNamespace}tbl"),
+          "colName" -> "`s`.`c3`")
+      )
+    }
+  }
+
+  test("Insert with evolution but without coercion flag:" +
+      " source missing nested struct field by name fails") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))), targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b")))), sourceSchema)
+      checkError(
+        exception = intercept[AnalysisException] {
+          doInsertWithSchemaEvolution(t1, sourceData, byName = true)
+        },
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        parameters = Map(
+          "tableName" -> toSQLId(s"${catalogAndNamespace}tbl"),
+          "colName" -> "`s`.`c3`")
+      )
+    }
+  }
+
+  test("Insert without evolution: source missing nested struct field by position fails") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTable(t1) {
+      val targetSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType),
+          StructField("c3", BooleanType))))))
+      sql(s"CREATE TABLE $t1 (id int, s struct<c1:int,c2:string,c3:boolean>) USING $v2Format")
+      val targetData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(0, Row(1, "a", true)))), targetSchema)
+      doInsert(t1, targetData)
+
+      val sourceSchema = StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("s", StructType(Seq(
+          StructField("c1", IntegerType),
+          StructField("c2", StringType))))))
+      val sourceData = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(1, Row(10, "b")))), sourceSchema)
+      checkError(
+        exception = intercept[AnalysisException] {
+          doInsert(t1, sourceData)
+        },
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
+        parameters = Map(
+          "tableName" -> toSQLId(s"${catalogAndNamespace}tbl"),
+          "colName" -> "`s`",
+          "missingFields" -> "`c3`")
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 4a406322a5a19..38de6b043bc2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.connector.expressions.Expressions._
 import org.apache.spark.sql.execution.{ExtendedMode, FormattedMode, RDDScanExec, SimpleMode, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, GroupPartitionsExec}
 import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
-import org.apache.spark.sql.execution.joins.SortMergeJoinExec
+import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions.{col, max}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf._
@@ -3913,4 +3913,79 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase with
       }
     }
   }
+
+  test("SPARK-56549: k-way merge enabled only when parent requires ordering") {
+    // Both tables are partitioned by id/item_id and report a two-column ordering.
+    // Key 1 appears on two splits on each side, so GroupPartitionsExec must coalesce.
+    //
+    // Dynamic gate: with the config enabled, k-way merge must be activated only when the parent
+    // actually requires ordering (SMJ), and must stay off when the parent does not (hash join).
+    val itemOrdering = Array(
+      sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST),
+      sort(FieldReference("arrive_time"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST))
+    createTable(items, itemsColumns, Array(identity("id")), itemOrdering)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+      "(2, 'cc', 30.0, cast('2023-06-15' as timestamp)), " +
+      "(1, 'bb', 20.0, cast('2022-03-10' as timestamp)), " +
+      "(3, 'dd', 40.0, cast('2024-01-01' as timestamp)), " +
+      "(1, 'aa', 10.0, cast('2021-05-20' as timestamp)), " +
+      "(2, 'ee', 50.0, cast('2025-09-01' as timestamp))")
+
+    val purchaseOrdering = Array(
+      sort(FieldReference("item_id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST),
+      sort(FieldReference("time"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST))
+    createTable(purchases, purchasesColumns, Array(identity("item_id")), purchaseOrdering)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+      "(2, 50.0, cast('2025-09-01' as timestamp)), " +
+      "(1, 10.0, cast('2021-05-20' as timestamp)), " +
+      "(3, 40.0, cast('2024-01-01' as timestamp)), " +
+      "(2, 30.0, cast('2023-06-15' as timestamp)), " +
+      "(1, 20.0, cast('2022-03-10' as timestamp))")
+
+    withSQLConf(
+        SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false",
+        SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true"
+    ) {
+      val hashDf = sql(
+        s"""
+           |SELECT /*+ SHUFFLE_HASH(i, p) */ i.id, i.name
+           |FROM testcat.ns.$items i
+           |JOIN testcat.ns.$purchases p ON p.item_id = i.id AND p.time = i.arrive_time
+           |""".stripMargin)
+      checkAnswer(hashDf, Seq(Row(1, "aa"), Row(1, "bb"), Row(2, "cc"), Row(2, "ee"), Row(3, "dd")))
+      val hashPlan = hashDf.queryExecution.executedPlan
+      assert(collect(hashPlan) { case j: ShuffledHashJoinExec => j }.nonEmpty,
+        "expected ShuffledHashJoinExec")
+      assert(collectAllShuffles(hashPlan).isEmpty, "should not shuffle for compatible partitioning")
+      val hashCoalescing =
+        collectAllGroupPartitions(hashPlan).filter(_.groupedPartitions.exists(_._2.size > 1))
+      assert(hashCoalescing.nonEmpty, "expected coalescing GroupPartitionsExec")
+      hashCoalescing.foreach { gp =>
+        assert(!gp.enableSortedMerge,
+          "hash join does not require ordering: enableSortedMerge must stay false")
+        assert(!gp.execute().isInstanceOf[SortedMergeCoalescedRDD[_]],
+          "hash join does not require ordering: must use simple CoalescedRDD")
+      }
+
+      val smjDf = sql(
+        s"""
+           |${selectWithMergeJoinHint("i", "p")}
+           |i.id, i.name
+           |FROM testcat.ns.$items i
+           |JOIN testcat.ns.$purchases p ON p.item_id = i.id AND p.time = i.arrive_time
+           |""".stripMargin)
+      checkAnswer(smjDf, Seq(Row(1, "aa"), Row(1, "bb"), Row(2, "cc"), Row(2, "ee"), Row(3, "dd")))
+      val smjPlan = smjDf.queryExecution.executedPlan
+      assert(collectAllShuffles(smjPlan).isEmpty, "should not shuffle for compatible partitioning")
+      val smjCoalescing =
+        collectAllGroupPartitions(smjPlan).filter(_.groupedPartitions.exists(_._2.size > 1))
+      assert(smjCoalescing.nonEmpty, "expected coalescing GroupPartitionsExec")
+      smjCoalescing.foreach { gp =>
+        assert(gp.enableSortedMerge,
+          "sort-merge join requires ordering: enableSortedMerge must be true")
+        assert(gp.execute().isInstanceOf[SortedMergeCoalescedRDD[_]],
+          "sort-merge join requires ordering: must use SortedMergeCoalescedRDD")
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
index e1c574ec7ba65..cb59ce80328d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.sql.connector
 
+import org.apache.spark.sql.{sources, Column, Row}
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.Row
 import org.apache.spark.sql.classic.MergeIntoWriter
-import org.apache.spark.sql.connector.catalog.Column
+import org.apache.spark.sql.connector.catalog.{Aborted, Committed, InMemoryBaseTable, InMemoryRowLevelOperationTableCatalog}
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.catalog.TableInfo
 import org.apache.spark.sql.functions._
@@ -31,6 +31,164 @@ class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
 
   import testImplicits._
 
+  private def targetTableCol(colName: String): Column = {
+    col(tableNameAsString + "." + colName)
+  }
+
+  test("self merge with transactional checks") {
+    // create table
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // create a source on top of itself that will be fully resolved and analyzed
+    val sourceDF = spark.table(tableNameAsString)
+      .where("salary == 100")
+      .as("source")
+    sourceDF.queryExecution.assertAnalyzed()
+
+    // merge into table using the source on top of itself
+    val (txn, txnTables) = executeTransaction {
+      sourceDF
+        .mergeInto(
+          tableNameAsString,
+          $"source.pk" === targetTableCol("pk") && targetTableCol("dep") === "hr")
+        .whenMatched()
+        .update(Map("salary" -> targetTableCol("salary").plus(1)))
+        .whenNotMatched()
+        .insertAll()
+        .merge()
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 1)
+    assert(table.version() == "2")
+
+    // check all table scans
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size == 4)
+
+    // check table scans as MERGE target
+    val numTargetScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    }
+    assert(numTargetScans == 2)
+
+    // check table scans as MERGE source
+    val numSourceScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("salary", 100) => true
+      case _ => false
+    }
+    assert(numSourceScans == 2)
+
+    // check txn state was propagated correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 101, "hr"), // update
+        Row(2, 200, "software"), // unchanged
+        Row(3, 300, "hr"))) // unchanged
+  }
+
+  for (alterClause <- Seq(
+         "ADD COLUMN new_col INT",
+         "DROP COLUMN salary",
+         "ALTER COLUMN salary TYPE BIGINT",
+         "ALTER COLUMN pk DROP NOT NULL"))
+  test(s"self merge fails when source schema changes after analysis - DDL: $alterClause" ) {
+    withTable(tableNameAsString) {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |""".stripMargin)
+
+      val sourceDF = spark.table(tableNameAsString).where("salary == 100").as("source")
+      sourceDF.queryExecution.assertAnalyzed()
+
+      sql(s"ALTER TABLE $tableNameAsString $alterClause")
+
+      val e = intercept[AnalysisException] {
+        sourceDF
+          .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+          .whenMatched()
+          .update(Map("salary" -> targetTableCol("salary").plus(1)))
+          .merge()
+      }
+
+      assert(
+        e.getCondition == "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+        alterClause)
+      assert(catalog.lastTransaction.currentState == Aborted, alterClause)
+      assert(catalog.lastTransaction.isClosed, alterClause)
+    }
+  }
+
+  test("self merge fails when source table is dropped and recreated after analysis") {
+    withTable(tableNameAsString) {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |""".stripMargin)
+
+      val sourceDF = spark.table(tableNameAsString).where("salary == 100").as("source")
+      sourceDF.queryExecution.assertAnalyzed()
+
+      val originalId = catalog.loadTable(ident).id
+
+      sql(s"DROP TABLE $tableNameAsString")
+      sql(s"CREATE TABLE $tableNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+      val newId = catalog.loadTable(ident).id
+      assert(originalId != newId)
+
+      val e = intercept[AnalysisException] {
+        sourceDF
+          .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+          .whenMatched()
+          .update(Map("salary" -> targetTableCol("salary").plus(1)))
+          .merge()
+      }
+
+      assert(e.getCondition == "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.TABLE_ID_MISMATCH")
+      assert(catalog.lastTransaction.currentState == Aborted)
+      assert(catalog.lastTransaction.isClosed)
+    }
+  }
+
+  test("self merge fails when column is dropped and re-added after analysis") {
+    withTable(tableNameAsString) {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |""".stripMargin)
+
+      val sourceDF = spark.table(tableNameAsString).where("salary == 100").as("source")
+      sourceDF.queryExecution.assertAnalyzed()
+
+      sql(s"ALTER TABLE $tableNameAsString DROP COLUMN salary")
+      sql(s"ALTER TABLE $tableNameAsString ADD COLUMN salary INT")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sourceDF
+            .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+            .whenMatched()
+            .update(Map("salary" -> targetTableCol("salary").plus(1)))
+            .merge()
+        },
+        condition = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMN_ID_MISMATCH",
+        matchPVals = true,
+        parameters = Map("tableName" -> ".*", "errors" -> ".*"))
+
+      assert(catalog.lastTransaction.currentState == Aborted)
+      assert(catalog.lastTransaction.isClosed)
+    }
+  }
+
   test("merge into empty table with NOT MATCHED clause") {
     withTempView("source") {
       createTable("pk INT NOT NULL, salary INT, dep STRING")
@@ -979,6 +1137,7 @@ class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
   }
 
   test("SPARK-54157: version is refreshed when source is V2 table") {
+    import org.apache.spark.sql.connector.catalog.Column
     val sourceTable = "cat.ns1.source_table"
     withTable(sourceTable) {
       createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
@@ -1026,6 +1185,7 @@ class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
   }
 
   test("SPARK-54444: any schema changes after analysis are prohibited") {
+    import org.apache.spark.sql.connector.catalog.Column
     val sourceTable = "cat.ns1.source_table"
     withTable(sourceTable) {
       createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
@@ -1073,4 +1233,318 @@ class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
       assert(e.message.contains("incompatible changes to table `cat`.`ns1`.`source_table`"))
     }
   }
+
+  // Cache-substitution tests for the txn path: connector approves stale-free cached scans via
+  // Transaction.registerScans.
+  test("cached merge source is reused when the table is unchanged since caching") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    val tableVersionBeforeCache = table.version()
+
+    val sourceDF = spark.table(tableNameAsString).where("salary < 250").as("source")
+    sourceDF.cache()
+    sourceDF.count()
+
+    assert(table.version() == tableVersionBeforeCache, "sanity: caching does not bump version")
+
+    val (txn, txnTables) = executeTransaction {
+      sourceDF
+        .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+        .whenMatched()
+        .update(Map("salary" -> targetTableCol("salary").plus(1)))
+        .merge()
+    }
+
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 1)
+    assert(table.version() == "2")
+    assert(txn.registeredScans.nonEmpty, "registerScans should have accepted the cached scan")
+
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size == 4)
+    val sourceFilterScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.LessThan("salary", 250) => true
+      case _ => false
+    }
+    assert(sourceFilterScans == 2,
+      s"expected two salary < 250 scan events, got " +
+        s"${targetTxnTable.scanEvents.map(_.toSeq).mkString("[", ", ", "]")}")
+    val targetScans = targetTxnTable.scanEvents.count(_.isEmpty)
+    assert(targetScans == 2,
+      s"expected two target-side scans with no pushed filters, got " +
+        s"${targetTxnTable.scanEvents.map(_.toSeq).mkString("[", ", ", "]")}")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 101, "hr"),
+        Row(2, 201, "software"),
+        Row(3, 300, "hr")))
+  }
+
+  test("cached merge source is dropped when the table version moves on between caching and MERGE") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    val sourceDF = spark.table(tableNameAsString).where("salary < 250").as("source")
+    sourceDF.cache()
+    sourceDF.count()
+    val versionAtCache = table.version()
+
+    // Bump the version directly to simulate an out-of-band committer. A Spark-side write would
+    // also bump the version but would trigger CacheManager.refreshCache and defeat the test.
+    table.increaseVersion()
+    assert(table.version() != versionAtCache, "sanity: bump should change the version")
+
+    val (txn, txnTables) = executeTransaction {
+      sourceDF
+        .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+        .whenMatched()
+        .update(Map("salary" -> targetTableCol("salary").plus(1)))
+        .merge()
+    }
+
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 1)
+    assert(table.version() == "3")
+    assert(txn.registeredScans.isEmpty, "registerScans should refuse the stale cached scan")
+
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size == 4)
+    val sourceFilterScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.LessThan("salary", 250) => true
+      case _ => false
+    }
+    assert(sourceFilterScans == 2,
+      s"expected two salary<250 scan events (fresh, since cache was bypassed), got " +
+        s"${targetTxnTable.scanEvents.map(_.toSeq).mkString("[", ", ", "]")}")
+    val targetScans = targetTxnTable.scanEvents.count(_.isEmpty)
+    assert(targetScans == 2,
+      s"expected two target-side scans with no pushed filters, got " +
+        s"${targetTxnTable.scanEvents.map(_.toSeq).mkString("[", ", ", "]")}")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 101, "hr"),
+        Row(2, 201, "software"),
+        Row(3, 300, "hr")))
+  }
+
+  test("cached source from outside the txn catalog is reused without consulting registerScans") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    // Source has no DataSourceV2Relation, so validateCachedEntryForTransaction's txnTables set
+    // is empty and the cache is accepted without consulting registerScans.
+    val sourceDF = spark.range(2)
+      .select(
+        (col("id") + 1).cast(IntegerType).as("pk"),
+        lit(999).cast(IntegerType).as("salary"),
+        lit("hr").cast(StringType).as("dep"))
+      .as("source")
+    sourceDF.cache()
+    sourceDF.count()
+
+    val (txn, txnTables) = executeTransaction {
+      sourceDF
+        .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+        .whenMatched()
+        .update(Map("salary" -> $"source.salary"))
+        .merge()
+    }
+
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 1)
+    assert(table.version() == "2")
+    assert(txn.registeredScans.isEmpty,
+      "registerScans should not be consulted when the cached subtree has no txn-catalog reads")
+
+    val targetTxnTable = txnTables(tableNameAsString)
+    val targetScans = targetTxnTable.scanEvents.count(_.isEmpty)
+    assert(targetScans == 2,
+      s"expected two target-side scans, got " +
+        s"${targetTxnTable.scanEvents.map(_.toSeq).mkString("[", ", ", "]")}")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 999, "hr"),
+        Row(2, 999, "software")))
+  }
+
+  test("cached relation inside an IN-subquery is substituted via useCachedData recursion") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // The cached entry sits inside an IN-subquery, so substitution requires useCachedData's
+    // recursion into SubqueryExpression - transformDown alone wouldn't reach it.
+    val lookupName = "cat.ns1.cache_lookup"
+    withTable(lookupName) {
+      sql(s"CREATE TABLE $lookupName (pk INT) USING foo")
+      sql(s"INSERT INTO $lookupName VALUES (1), (2)")
+
+      val lookupDF = spark.table(lookupName)
+      lookupDF.cache()
+      lookupDF.count()
+      lookupDF.createOrReplaceTempView("cache_lookup_view")
+
+      val sourceDF = spark.table(tableNameAsString)
+        .where("pk IN (SELECT pk FROM cache_lookup_view)")
+        .as("source")
+
+      val (txn, txnTables) = executeTransaction {
+        sourceDF
+          .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+          .whenMatched()
+          .update(Map("salary" -> targetTableCol("salary").plus(1)))
+          .merge()
+      }
+
+      assert(txn.currentState == Committed)
+      assert(txn.isClosed)
+      // The helper `txnTables` is built from physical BatchScanExec nodes in the executed plan;
+      // since cache_lookup is cache-served, it has no BatchScanExec and doesn't appear here.
+      assert(txnTables.size == 1)
+      // Both the target (test_table) and the IN-subquery's lookup (cache_lookup) must be
+      // tracked through the txn catalog via TxnTableCatalog.loadTable. This is the diagnostic
+      // that surfaces UnresolveRelationsInTransaction subquery-walking gaps.
+      assert(txn.catalog.txnTables.size == 2)
+      assert(table.version() == "2")
+      // Every registered scan must point at the lookup (the only cached subtree in this plan).
+      val lookupIdent = Identifier.of(Array("ns1"), "cache_lookup")
+      val lookupTable = catalog.loadTable(lookupIdent)
+      assert(txn.registeredScans.flatten.collect {
+        case s: InMemoryBaseTable#InMemoryBatchScan => s.table
+      }.distinct == Seq(lookupTable))
+      val lookupTxnTable = txn.catalog.txnTables(lookupIdent)
+      assert(lookupTxnTable.scanEvents.nonEmpty,
+        "lookup scan events should be recorded via registerScans")
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(1, 101, "hr"),
+          Row(2, 201, "software"),
+          Row(3, 300, "hr")))
+    }
+  }
+
+  test("connector rejecting registerScans causes cache bypass") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    val sourceDF = spark.table(tableNameAsString).where("salary < 250").as("source")
+    sourceDF.cache()
+    sourceDF.count()
+
+    // Force the connector to reject `registerScans` (return false) so Spark must bypass
+    // the cache.
+    catalog.nextTxnRejectRegisteredScansAttempt = true
+
+    val (txn, txnTables) = executeTransaction {
+      sourceDF
+        .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+        .whenMatched()
+        .update(Map("salary" -> targetTableCol("salary").plus(1)))
+        .merge()
+    }
+
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 1)
+    assert(table.version() == "2")
+    assert(txn.registeredScans.isEmpty, "rejected registerScans attempt")
+
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size == 4)
+    val sourceFilterScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.LessThan("salary", 250) => true
+      case _ => false
+    }
+    assert(sourceFilterScans == 2,
+      s"expected two salary < 250 scan events (fresh, since cache was bypassed), got " +
+        s"${targetTxnTable.scanEvents.map(_.toSeq).mkString("[", ", ", "]")}")
+    val targetScans = targetTxnTable.scanEvents.count(_.isEmpty)
+    assert(targetScans == 2,
+      s"expected two target-side scans, got " +
+        s"${targetTxnTable.scanEvents.map(_.toSeq).mkString("[", ", ", "]")}")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 101, "hr"),
+        Row(2, 201, "software"),
+        Row(3, 300, "hr")))
+  }
+
+  test("registerScans receives only scans for the transaction's catalog") {
+    withSQLConf("spark.sql.catalog.cat2" ->
+        classOf[InMemoryRowLevelOperationTableCatalog].getName) {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |""".stripMargin)
+
+      withTable("cat2.ns.lookup") {
+        sql("CREATE TABLE cat2.ns.lookup (pk INT) USING foo")
+        sql("INSERT INTO cat2.ns.lookup VALUES (1), (2)")
+
+        // Cache a subtree that joins a `cat` table (the txn catalog) with a `cat2` table.
+        val sourceDF = spark.table(tableNameAsString)
+          .join(spark.table("cat2.ns.lookup"), "pk")
+          .as("source")
+        sourceDF.cache()
+        sourceDF.count()
+
+        val (txn, txnTables) = executeTransaction {
+          sourceDF
+            .mergeInto(tableNameAsString, $"source.pk" === targetTableCol("pk"))
+            .whenMatched()
+            .update(Map("salary" -> targetTableCol("salary").plus(1)))
+            .merge()
+        }
+
+        assert(txn.currentState == Committed)
+        assert(txn.isClosed)
+        // Only the cat-side target is tracked in the txn's catalog; cat2.lookup goes through
+        // a different (non-transactional) catalog and must not appear here.
+        assert(txnTables.size == 1)
+        assert(txn.catalog.txnTables.size == 1)
+        assert(table.version() == "2")
+        val passed = txn.registeredScans.flatten.collect {
+          case s: InMemoryBaseTable#InMemoryBatchScan => s
+        }
+        assert(passed.nonEmpty, "registerScans should have been consulted")
+        // The filter should pass only the cat-side scan; the cat2 scan must be filtered out.
+        val catTable = catalog.loadTable(ident)
+        assert(passed.forall(_.table.id() == catTable.id()),
+          "only the txn-catalog's scan should reach registerScans")
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Seq(
+            Row(1, 101, "hr"),
+            Row(2, 201, "software")))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
index 4118e57216145..bb221a200b2d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
@@ -1265,4 +1265,42 @@ trait MergeIntoSchemaEvolutionBasicTests extends MergeIntoSchemaEvolutionSuiteBa
     expected = Seq((1, "hr")).toDF("pk", "dep"),
     expectedWithoutEvolution = Seq((1, "hr")).toDF("pk", "dep")
   )
+
+  for (colName <- Seq("job.title", "job title")) {
+    testEvolution(s"SPARK-56462: source has extra column with special-char name: $colName")(
+      targetData = Seq(
+        (1, 100, "hr"),
+        (2, 200, "software"),
+        (3, 300, "hr")
+      ).toDF("pk", "salary", "dep"),
+      sourceData = Seq(
+        (2, 150, "finance", "engineer"),
+        (4, 400, "finance", "manager")
+      ).toDF("pk", "salary", "dep", colName),
+      clauses = Seq(updateAll(), insertAll()),
+      expected = Seq[(Int, Int, String, String)](
+        (1, 100, "hr", null),
+        (2, 150, "finance", "engineer"),
+        (3, 300, "hr", null),
+        (4, 400, "finance", "manager")
+      ).toDF("pk", "salary", "dep", colName),
+      expectedWithoutEvolution = Seq(
+        (1, 100, "hr"),
+        (2, 150, "finance"),
+        (3, 300, "hr"),
+        (4, 400, "finance")
+      ).toDF("pk", "salary", "dep"),
+      expectedSchema = StructType(Seq(
+        StructField("pk", IntegerType, nullable = false),
+        StructField("salary", IntegerType, nullable = false),
+        StructField("dep", StringType),
+        StructField(colName, StringType)
+      )),
+      expectedSchemaWithoutEvolution = StructType(Seq(
+        StructField("pk", IntegerType, nullable = false),
+        StructField("salary", IntegerType, nullable = false),
+        StructField("dep", StringType)
+      ))
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
index 7c0e503705c7e..126b84b507caf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.sql.connector
 
 import org.apache.spark.SparkRuntimeException
+import org.apache.spark.internal.config
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, In, Not}
 import org.apache.spark.sql.catalyst.optimizer.BuildLeft
-import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue, InMemoryTable, TableInfo}
+import org.apache.spark.sql.connector.catalog.{Aborted, Column, ColumnDefaultValue, Committed, InMemoryTable, TableInfo}
 import org.apache.spark.sql.connector.expressions.{GeneralScalarExpression, LiteralValue}
 import org.apache.spark.sql.connector.write.MergeSummary
 import org.apache.spark.sql.execution.SparkPlan
@@ -29,6 +30,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.v2.MergeRowsExec
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, CartesianProductExec}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources
 import org.apache.spark.sql.types.{IntegerType, StringType}
 
 abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
@@ -38,6 +40,298 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
 
   protected def deltaMerge: Boolean = false
 
+  test("self merge with transactional checks") {
+    // create table
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // merge into table using a subquery on top of itself
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""MERGE INTO $tableNameAsString t
+           |USING (SELECT * FROM $tableNameAsString WHERE salary = 100) s
+           |ON t.pk = s.pk AND t.dep = 'hr'
+           |WHEN MATCHED THEN
+           | UPDATE SET salary = t.salary + 1
+           |WHEN NOT MATCHED THEN
+           | INSERT *
+           |""".stripMargin)
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 1)
+    assert(table.version() == "2")
+
+    // check all table scans
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.size == 4)
+
+    // check table scans as MERGE target
+    val numTargetScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    }
+    assert(numTargetScans == 2)
+
+    // check table scans as MERGE source
+    val numSourceScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("salary", 100) => true
+      case _ => false
+    }
+    assert(numSourceScans == 2)
+
+    // check txn state was propagated correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 101, "hr"), // update
+        Row(2, 200, "software"), // unchanged
+        Row(3, 300, "hr"))) // unchanged
+  }
+
+  test("merge into table with analysis failure and transactional checks") {
+    createAndInitTable(
+      "pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'support'), (4, 400, 'finance')")
+
+    val exception = intercept[AnalysisException] {
+      sql(
+        s"""MERGE INTO $tableNameAsString t
+           |USING $sourceNameAsString s
+           |ON t.pk = s.pk
+           |WHEN MATCHED THEN
+           | UPDATE SET salary = s.invalid_column
+           |WHEN NOT MATCHED THEN
+           | INSERT (pk, salary, dep) VALUES (s.pk, s.salary, 'pending')
+           |""".stripMargin)
+    }
+
+    assert(exception.getMessage.contains("invalid_column"))
+    assert(catalog.lastTransaction.currentState == Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  test("merge into table using view with transactional checks") {
+    withView("temp_view") {
+      // create target table
+      createAndInitTable(
+        "pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |{ "pk": 3, "salary": 300, "dep": "hr" }
+          |""".stripMargin)
+
+      // create source table
+      sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT)")
+      sql(s"INSERT INTO $sourceNameAsString (pk, salary) VALUES (1, 150), (4, 400)")
+
+      // create view on top of source and target tables
+      sql(
+        s"""CREATE VIEW temp_view AS
+           |SELECT s.pk, s.salary, t.dep
+           |FROM $sourceNameAsString s
+           |LEFT JOIN (
+           | SELECT * FROM $tableNameAsString WHERE pk < 10
+           |) t ON s.pk = t.pk
+           |""".stripMargin)
+
+      // merge into target table using view
+      val (txn, txnTables) = executeTransaction {
+        sql(s"""MERGE INTO $tableNameAsString t
+           |USING temp_view s
+           |ON t.pk = s.pk AND t.dep = 'hr'
+           |WHEN MATCHED THEN
+           | UPDATE SET salary = s.salary, dep = s.dep
+           |WHEN NOT MATCHED THEN
+           | INSERT (pk, salary, dep) VALUES (s.pk, s.salary, 'pending')
+           |""".stripMargin)
+      }
+
+      // check txn covers both tables and was properly committed and closed
+      assert(txn.currentState == Committed)
+      assert(txn.isClosed)
+      assert(txnTables.size == 2)
+      assert(table.version() == "2")
+
+      // check target table was scanned correctly
+      val targetTxnTable = txnTables(tableNameAsString)
+      assert(targetTxnTable.scanEvents.size == 4)
+
+      // check target table scans as MERGE target (dep = 'hr')
+      val numMergeTargetScans = targetTxnTable.scanEvents.flatten.count {
+        case sources.EqualTo("dep", "hr") => true
+        case _ => false
+      }
+      assert(numMergeTargetScans == 2)
+
+      // check target table scans in view as MERGE source (pk < 10)
+      val numViewTargetScans = targetTxnTable.scanEvents.flatten.count {
+        case sources.LessThan("pk", 10L) => true
+        case _ => false
+      }
+      assert(numViewTargetScans == 2)
+
+      // check source table scans in view as MERGE source (no predicate)
+      val sourceTxnTable = txnTables(sourceNameAsString)
+      assert(sourceTxnTable.scanEvents.size == 2)
+
+      // check txn state was propagated correctly
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(1, 150, "hr"), // update
+          Row(2, 200, "software"), // unchanged
+          Row(3, 300, "hr"), // unchanged
+          Row(4, 400, "pending"))) // new
+    }
+  }
+
+  test("merge into table using nested view with transactional checks") {
+    withView("base_view", "nested_view") {
+      withTable(sourceNameAsString) {
+        // create target table
+        createAndInitTable(
+          "pk INT NOT NULL, salary INT, dep STRING",
+          """{ "pk": 1, "salary": 100, "dep": "hr" }
+            |{ "pk": 2, "salary": 200, "dep": "software" }
+            |{ "pk": 3, "salary": 300, "dep": "hr" }
+            |""".stripMargin)
+
+        // create source table
+        sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT)")
+        sql(s"INSERT INTO $sourceNameAsString (pk, salary) VALUES (1, 150), (4, 400)")
+
+        // create base view
+        sql(
+          s"""CREATE VIEW base_view AS
+             |SELECT s.pk, s.salary, t.dep
+             |FROM $sourceNameAsString s
+             |LEFT JOIN (
+             | SELECT * FROM $tableNameAsString WHERE pk < 10
+             |) t ON s.pk = t.pk
+             |""".stripMargin)
+
+        // create nested view on top of base view
+        sql(
+          s"""CREATE VIEW nested_view AS
+             |SELECT * FROM base_view
+             |""".stripMargin)
+
+        // merge into target table using nested view
+        val (txn, txnTables) = executeTransaction {
+          sql(
+            s"""MERGE INTO $tableNameAsString t
+               |USING nested_view s
+               |ON t.pk = s.pk AND t.dep = 'hr'
+               |WHEN MATCHED THEN
+               | UPDATE SET salary = s.salary, dep = s.dep
+               |WHEN NOT MATCHED THEN
+               | INSERT (pk, salary, dep) VALUES (s.pk, s.salary, 'pending')
+               |""".stripMargin)
+        }
+
+        // check txn covers both tables and was properly committed and closed
+        assert(txn.currentState == Committed)
+        assert(txn.isClosed)
+        assert(txnTables.size == 2)
+        assert(table.version() == "2")
+
+        // check target table was scanned correctly
+        val targetTxnTable = txnTables(tableNameAsString)
+        assert(targetTxnTable.scanEvents.size == 4)
+
+        // check target table scans as MERGE target (dep = 'hr')
+        val numMergeTargetScans = targetTxnTable.scanEvents.flatten.count {
+          case sources.EqualTo("dep", "hr") => true
+          case _ => false
+        }
+        assert(numMergeTargetScans == 2)
+
+        // check target table scans in view as MERGE source (pk < 10)
+        val numViewTargetScans = targetTxnTable.scanEvents.flatten.count {
+          case sources.LessThan("pk", 10L) => true
+          case _ => false
+        }
+        assert(numViewTargetScans == 2)
+
+        // check source table scans in view as MERGE source (no predicate)
+        val sourceTxnTable = txnTables(sourceNameAsString)
+        assert(sourceTxnTable.scanEvents.size == 2)
+
+        // check txn state was propagated correctly
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Seq(
+            Row(1, 150, "hr"), // update
+            Row(2, 200, "software"), // unchanged
+            Row(3, 300, "hr"), // unchanged
+            Row(4, 400, "pending"))) // new
+      }
+    }
+  }
+
+  test("merge into table rewritten as INSERT with transactional checks") {
+    withTable(sourceNameAsString) {
+      // create target table
+      createAndInitTable(
+        "pk INT, value STRING, dep STRING",
+        """{ "pk": 1, "value": "a", "dep": "hr" }
+          |{ "pk": 2, "value": "b", "dep": "finance" }
+          |""".stripMargin)
+
+      // create source table
+      sql(s"CREATE TABLE $sourceNameAsString (pk INT, value STRING, dep STRING)")
+      sql(s"INSERT INTO $sourceNameAsString VALUES (3, 'c', 'hr'), (4, 'd', 'software')")
+
+      // merge into target with only WHEN NOT MATCHED clauses (rewritten as insert)
+      val (txn, txnTables) = executeTransaction {
+        sql(
+          s"""MERGE INTO $tableNameAsString t
+             |USING $sourceNameAsString s
+             |ON t.pk = s.pk
+             |WHEN NOT MATCHED AND s.pk < 4 THEN
+             |  INSERT (pk, value, dep) VALUES (s.pk, concat(s.value, '_low'), s.dep)
+             |WHEN NOT MATCHED AND s.pk >= 4 THEN
+             |  INSERT (pk, value, dep) VALUES (s.pk, concat(s.value, '_high'), s.dep)
+             |""".stripMargin)
+      }
+
+      // check txn covers both tables and was properly committed and closed
+      assert(txn.currentState == Committed)
+      assert(txn.isClosed)
+      assert(txnTables.size == 2)
+      assert(table.version() == "2")
+
+      // check target table was scanned correctly
+      val targetTxnTable = txnTables(tableNameAsString)
+      assert(targetTxnTable.scanEvents.size == 1)
+
+      // check source table was scanned correctly
+      val sourceTxnTable = txnTables(sourceNameAsString)
+      assert(sourceTxnTable.scanEvents.size == 1)
+
+      // check txn state was propagated correctly
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(1, "a", "hr"), // unchanged
+          Row(2, "b", "finance"), // unchanged
+          Row(3, "c_low", "hr"), // inserted via first NOT MATCHED clause
+          Row(4, "d_high", "software"))) // inserted via second NOT MATCHED clause
+    }
+  }
+
   test("merge into table with expression-based default values") {
     val columns = Array(
       Column.create("pk", IntegerType),
@@ -191,6 +485,16 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
           Row(1, 100, "hr"), // insert
           Row(2, 200, "finance"), // insert
           Row(3, 300, "hr"))) // insert
+
+      val mergeSummary = getMergeSummary()
+      assert(mergeSummary.numTargetRowsInserted === 3L)
+      assert(mergeSummary.numTargetRowsCopied === 0L)
+      assert(mergeSummary.numTargetRowsUpdated === 0L)
+      assert(mergeSummary.numTargetRowsDeleted === 0L)
+      assert(mergeSummary.numTargetRowsMatchedUpdated === 0L)
+      assert(mergeSummary.numTargetRowsMatchedDeleted === 0L)
+      assert(mergeSummary.numTargetRowsNotMatchedBySourceUpdated === 0L)
+      assert(mergeSummary.numTargetRowsNotMatchedBySourceDeleted === 0L)
     }
   }
 
@@ -217,6 +521,16 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
         Seq(
           Row(2, 200, "finance"), // insert
           Row(3, 300, "hr"))) // insert
+
+      val mergeSummary = getMergeSummary()
+      assert(mergeSummary.numTargetRowsInserted === 2L)
+      assert(mergeSummary.numTargetRowsCopied === 0L)
+      assert(mergeSummary.numTargetRowsUpdated === 0L)
+      assert(mergeSummary.numTargetRowsDeleted === 0L)
+      assert(mergeSummary.numTargetRowsMatchedUpdated === 0L)
+      assert(mergeSummary.numTargetRowsMatchedDeleted === 0L)
+      assert(mergeSummary.numTargetRowsNotMatchedBySourceUpdated === 0L)
+      assert(mergeSummary.numTargetRowsNotMatchedBySourceDeleted === 0L)
     }
   }
 
@@ -246,6 +560,115 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
           Row(1, 100, "hr"), // insert
           Row(2, 200, "finance"), // insert
           Row(3, 300, "hr"))) // insert
+
+      val mergeSummary = getMergeSummary()
+      assert(mergeSummary.numTargetRowsInserted === 3L)
+      assert(mergeSummary.numTargetRowsCopied === 0L)
+      assert(mergeSummary.numTargetRowsUpdated === 0L)
+      assert(mergeSummary.numTargetRowsDeleted === 0L)
+      assert(mergeSummary.numTargetRowsMatchedUpdated === 0L)
+      assert(mergeSummary.numTargetRowsMatchedDeleted === 0L)
+      assert(mergeSummary.numTargetRowsNotMatchedBySourceUpdated === 0L)
+      assert(mergeSummary.numTargetRowsNotMatchedBySourceDeleted === 0L)
+    }
+  }
+
+  test("merge with literal false ON condition") {
+    withTempView("source") {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "hardware" }
+          |""".stripMargin)
+
+      Seq((1, 100, "hr"), (3, 300, "finance"))
+        .toDF("pk", "salary", "dep").createOrReplaceTempView("source")
+
+      sql(
+        s"""MERGE INTO $tableNameAsString t
+           |USING source s
+           |ON false
+           |WHEN MATCHED THEN UPDATE SET t.salary = -1
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(Row(1, 100, "hr"), Row(2, 200, "hardware")))
+
+      val summary = getMergeSummary()
+      assert(summary.numTargetRowsUpdated === 0L)
+      assert(summary.numTargetRowsDeleted === 0L)
+      assert(summary.numTargetRowsInserted === 0L)
+      assert(summary.numTargetRowsMatchedUpdated === 0L)
+      assert(summary.numTargetRowsMatchedDeleted === 0L)
+      assert(summary.numTargetRowsNotMatchedBySourceUpdated === 0L)
+      assert(summary.numTargetRowsNotMatchedBySourceDeleted === 0L)
+      assert(summary.numTargetRowsCopied === 0L)
+    }
+  }
+
+  test("merge with literal true ON condition") {
+    withTempView("source") {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "hardware" }
+          |""".stripMargin)
+
+      Seq((99, 999, "finance"))
+        .toDF("pk", "salary", "dep").createOrReplaceTempView("source")
+
+      sql(
+        s"""MERGE INTO $tableNameAsString t
+           |USING source s
+           |ON true
+           |WHEN MATCHED THEN UPDATE SET t.salary = -1
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(Row(1, -1, "hr"), Row(2, -1, "hardware")))
+
+      val summary = getMergeSummary()
+      assert(summary.numTargetRowsUpdated === 2L)
+      assert(summary.numTargetRowsDeleted === 0L)
+      assert(summary.numTargetRowsInserted === 0L)
+      assert(summary.numTargetRowsMatchedUpdated === 2L)
+      assert(summary.numTargetRowsMatchedDeleted === 0L)
+      assert(summary.numTargetRowsNotMatchedBySourceUpdated === 0L)
+      assert(summary.numTargetRowsNotMatchedBySourceDeleted === 0L)
+      assert(summary.numTargetRowsCopied === 0L)
+    }
+  }
+
+  test("merge with statically empty source and only MATCHED clauses") {
+    withTempView("source") {
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "hardware" }
+          |""".stripMargin)
+
+      Seq.empty[(Int, Int, String)].toDF("pk", "salary", "dep")
+        .createOrReplaceTempView("source")
+
+      sql(
+        s"""MERGE INTO $tableNameAsString t
+           |USING source s
+           |ON t.pk = s.pk
+           |WHEN MATCHED THEN UPDATE SET t.salary = -1
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(Row(1, 100, "hr"), Row(2, 200, "hardware")))
+
+      val summary = getMergeSummary()
+      assert(summary.numTargetRowsUpdated === 0L)
+      assert(summary.numTargetRowsDeleted === 0L)
+      assert(summary.numTargetRowsInserted === 0L)
+      assert(summary.numTargetRowsMatchedUpdated === 0L)
+      assert(summary.numTargetRowsMatchedDeleted === 0L)
+      assert(summary.numTargetRowsNotMatchedBySourceUpdated === 0L)
+      assert(summary.numTargetRowsNotMatchedBySourceDeleted === 0L)
+      assert(summary.numTargetRowsCopied === 0L)
     }
   }
 
@@ -671,6 +1094,137 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
     }
   }
 
+  test("merge with CTE with transactional checks") {
+    withTable(sourceNameAsString) {
+      // create target table
+      createAndInitTable(
+        "pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |{ "pk": 3, "salary": 300, "dep": "hr" }
+          |""".stripMargin)
+
+      // create source table
+      sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+      sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'hr'), (4, 400, 'finance')")
+
+      // merge into target table using CTE
+      val (txn, txnTables) = executeTransaction {
+        sql(
+          s"""WITH cte AS (
+             |  SELECT pk, salary + 50 AS salary, dep
+             |  FROM $sourceNameAsString
+             |  WHERE salary > 100
+             |)
+             |MERGE INTO $tableNameAsString t
+             |USING cte s
+             |ON t.pk = s.pk AND t.dep = 'hr'
+             |WHEN MATCHED THEN
+             | UPDATE SET salary = s.salary
+             |WHEN NOT MATCHED THEN
+             | INSERT (pk, salary, dep) VALUES (s.pk, s.salary, 'pending')
+             |""".stripMargin)
+      }
+
+      // check txn covers both tables and was properly committed and closed
+      assert(txn.currentState == Committed)
+      assert(txn.isClosed)
+      assert(txnTables.size == 2)
+      assert(table.version() == "2")
+
+      // check target table was scanned correctly
+      val targetTxnTable = txnTables(tableNameAsString)
+      assert(targetTxnTable.scanEvents.size == 2)
+
+      // check target table scans as MERGE target (dep = 'hr')
+      val numMergeTargetScans = targetTxnTable.scanEvents.flatten.count {
+        case sources.EqualTo("dep", "hr") => true
+        case _ => false
+      }
+      assert(numMergeTargetScans == 2)
+
+      // check source table was scanned correctly
+      val sourceTxnTable = txnTables(sourceNameAsString)
+      assert(sourceTxnTable.scanEvents.size == 2)
+
+      // check source table scans in CTE (salary > 100)
+      val numCteSourceScans = sourceTxnTable.scanEvents.flatten.count {
+        case sources.GreaterThan("salary", 100) => true
+        case _ => false
+      }
+      assert(numCteSourceScans == 2)
+
+      // check txn state was propagated correctly
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(1, 200, "hr"), // updated (150 + 50)
+          Row(2, 200, "software"), // unchanged
+          Row(3, 300, "hr"), // unchanged
+          Row(4, 450, "pending"))) // inserted (400 + 50)
+    }
+  }
+
+  test("merge with cached source and transactional checks") {
+    withTable(sourceNameAsString) {
+      // create target table
+      createAndInitTable(
+        "pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |{ "pk": 3, "salary": 300, "dep": "hr" }
+          |""".stripMargin)
+
+      // create and populate source table
+      sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+      sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'support'), (4, 400, 'finance')")
+
+      // Cache source table before the transaction. Make sure when the transation is active the
+      // catalog still creates a transaction table.
+      spark.table(sourceNameAsString).cache()
+
+      try {
+        val (txn, txnTables) = executeTransaction {
+          sql(
+            s"""MERGE INTO $tableNameAsString t
+               |USING $sourceNameAsString s
+               |ON t.pk = s.pk AND t.dep = 'hr'
+               |WHEN MATCHED THEN
+               | UPDATE SET salary = s.salary, dep = s.dep
+               |WHEN NOT MATCHED THEN
+               | INSERT (pk, salary, dep) VALUES (s.pk, s.salary, 'pending')
+               |""".stripMargin)
+        }
+
+        assert(txn.currentState == Committed)
+        assert(txn.isClosed)
+        // The source TxnTable is still created during analysis (the txn catalog routes the
+        // load), but cache substitution reuses the cached scan instead of issuing a fresh
+        // BatchScanExec for the source, so only the target appears in the executed plan.
+        assert(txn.catalog.txnTables.size == 2)
+        assert(txnTables.size == 1)
+        assert(txnTables.contains(tableNameAsString))
+        assert(table.version() == "2")
+        // The connector accepted the cached source scan via registerScans, which also
+        // records the scan as a read event against the source's TxnTable.
+        assert(txn.registeredScans.nonEmpty)
+        val sourceTxnTable = txn.catalog.txnTables.values.find(_.name == sourceNameAsString).get
+        assert(sourceTxnTable.scanEvents.nonEmpty)
+        assert(txnTables(tableNameAsString).scanEvents.nonEmpty)
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Seq(
+            Row(1, 150, "support"), // matched and updated
+            Row(2, 200, "software"), // unchanged
+            Row(3, 300, "hr"), // unchanged (no match in source)
+            Row(4, 400, "pending"))) // not matched, inserted
+      } finally {
+        spark.catalog.uncacheTable(sourceNameAsString)
+      }
+    }
+  }
+
   test("merge with subquery as source") {
     withTempView("source") {
       createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
@@ -2148,6 +2702,58 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
     }
   }
 
+  test("metric values are stable across stage retries") {
+    // The join in the MERGE plan introduces a shuffle (with broadcast disabled), and the
+    // DAGScheduler corrupts the first attempt of every upstream shuffle map stage. Note:
+    // the current fetch-failure injection does not retry the MergeRowsExec/writer stage,
+    // so this test passes equally well with plain SQLMetric — it only exercises the
+    // SLAM-aware read path. Follow-up #55738 will add infra to actually retry the writer
+    // stage and exercise the SLAM behavior end-to-end for MERGE.
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("source") {
+        createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+          """{ "pk": 1, "salary": 100, "dep": "hr" }
+            |{ "pk": 2, "salary": 200, "dep": "software" }
+            |{ "pk": 3, "salary": 300, "dep": "hr" }
+            |""".stripMargin)
+
+        val sourceDF = Seq(1, 2, 10).toDF("pk")
+        sourceDF.createOrReplaceTempView("source")
+
+        withSparkContextConf(
+            config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key -> "true") {
+          sql(
+            s"""MERGE INTO $tableNameAsString t
+               |USING source s
+               |ON t.pk = s.pk
+               |WHEN MATCHED THEN
+               | UPDATE SET salary = salary + 100
+               |WHEN NOT MATCHED THEN
+               | INSERT (pk, salary, dep) VALUES (s.pk, 999, 'unknown')
+               |""".stripMargin)
+        }
+
+        val mergeSummary = getMergeSummary()
+        assert(mergeSummary.numTargetRowsUpdated === 2L)
+        assert(mergeSummary.numTargetRowsMatchedUpdated === 2L)
+        assert(mergeSummary.numTargetRowsInserted === 1L)
+        assert(mergeSummary.numTargetRowsCopied === (if (deltaMerge) 0L else 1L))
+        assert(mergeSummary.numTargetRowsDeleted === 0L)
+        assert(mergeSummary.numTargetRowsMatchedDeleted === 0L)
+        assert(mergeSummary.numTargetRowsNotMatchedBySourceUpdated === 0L)
+        assert(mergeSummary.numTargetRowsNotMatchedBySourceDeleted === 0L)
+
+        checkAnswer(
+          sql(s"SELECT pk, salary FROM $tableNameAsString ORDER BY pk"),
+          Seq(
+            Row(1, 200),
+            Row(2, 300),
+            Row(3, 300),
+            Row(10, 999)))
+      }
+    }
+  }
+
   test("SPARK-55074: imerge with type coercion from INT to STRING") {
     // INT -> STRING is allowed in ANSI mode, merge should succeed via type coercion
     // without requiring schema evolution
@@ -2223,6 +2829,8 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase
       sql(query)
     }
     assert(e.getMessage.contains("ON search condition of the MERGE statement"))
+    assert(catalog.lastTransaction.currentState == Aborted)
+    assert(catalog.lastTransaction.isClosed)
   }
 
   private def assertMetric(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
index fe338175ec888..77e3818aafe8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
@@ -376,6 +376,32 @@ class MetadataColumnSuite extends DatasourceV2SQLBase {
     }
   }
 
+  test("SPARK-56132: streaming read of metadata columns from V2 source") {
+    withTable(tbl) {
+      prepareTable()
+      withTempDir { checkpointDir =>
+        // "index" is a metadata column (not in the table schema); "id" and "data" are data columns.
+        val df = spark.readStream.table(tbl).select("id", "data", "index")
+        val q = df.writeStream
+          .format("memory")
+          .queryName("result_56132")
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .start()
+        try {
+          q.processAllAvailable()
+          val result = spark.table("result_56132")
+          // Verify data columns arrive correctly and index (metadata) is non-null.
+          checkAnswer(result.select("id", "data").orderBy("id"),
+            Seq(Row(1, "a"), Row(2, "b"), Row(3, "c")))
+          assert(result.select("index").collect().forall(!_.isNullAt(0)),
+            "index metadata column should be non-null in streaming output")
+        } finally {
+          q.stop()
+        }
+      }
+    }
+  }
+
   test("SPARK-43123: Metadata column related field metadata should not be leaked to catalogs") {
     withTable(tbl, "testcat.target") {
       prepareTable()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
index 11bce7afb6545..f6b0dae9b362f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
@@ -222,6 +222,18 @@ class ProcedureSuite extends SharedSparkSession with BeforeAndAfter {
     }
   }
 
+  test("PATH enabled: unqualified CALL skips missing candidate and keeps searching") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      try {
+        catalog("cat2").createProcedure(Identifier.of(Array("ns_hit"), "sum"), UnboundLongSum)
+        sql("SET PATH = cat.ns_miss, cat2.ns_hit")
+        checkAnswer(sql("CALL sum(1, 2)"), Row(3L) :: Nil)
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+      }
+    }
+  }
+
   test("required parameter not found") {
     catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
     checkError(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTableNetChangesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTableNetChangesSuite.scala
new file mode 100644
index 0000000000000..6ed5070e4f54c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTableNetChangesSuite.scala
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{
+  ChangelogProperties, Column, Identifier, InMemoryChangelogCatalog}
+import org.apache.spark.sql.connector.catalog.Changelog.{
+  CHANGE_TYPE_DELETE, CHANGE_TYPE_INSERT, CHANGE_TYPE_UPDATE_POSTIMAGE,
+  CHANGE_TYPE_UPDATE_PREIMAGE}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Shared test bodies for the `netChanges` deduplication mode handled by
+ * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]].
+ *
+ * Concrete subclasses fix the [[computeUpdates]] and
+ * [[representsUpdateAsDeleteAndInsert]] flags. Test bodies use
+ * [[computedPreUpdateLabel]] / [[computedPostUpdateLabel]] in their expected outputs.
+ *
+ * Setup convention: every test runs against an in-memory connector configured
+ * with `containsIntermediateChanges = true` and
+ * `containsCarryoverRows = false`, which means
+ *   - netChanges is enabled;
+ *   - carry-over removal is disabled (so the test directly controls events).
+ *
+ * When `representsUpdateAsDeleteAndInsert = true` AND `computeUpdates = true`, update
+ * detection runs upstream of netChanges, exercising the chained pipeline. The
+ * `addUpdatePre` / `addUpdatePost` helpers then emit raw `delete` / `insert` events
+ * (decomposed updates) which update detection relabels back to update pre/post before
+ * netChanges sees them. Output assertions stay identical because both paths produce
+ * the same `_change_type` labels at the netChanges input.
+ */
+trait ResolveChangelogTableNetChangesTestsBase
+    extends QueryTest
+    with SharedSparkSession
+    with BeforeAndAfterEach {
+
+  /**
+   * Value of the user-facing CDC option `computeUpdates` that this test run
+   * exercises. Concrete subclasses pin this to `true` or `false`.
+   */
+  protected def computeUpdates: Boolean
+
+  /**
+   * Value of the connector capability `representsUpdateAsDeleteAndInsert`. When `true`
+   * (combined with `computeUpdates = true`), update detection runs upstream of netChanges
+   * and the test helpers `addUpdatePre` / `addUpdatePost` emit decomposed `delete` /
+   * `insert` events instead of native pre/post events.
+   */
+  protected def representsUpdateAsDeleteAndInsert: Boolean = false
+
+  private val catalogName = "cdc_netchanges_catalog"
+  private val testTableName = "events"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(
+      s"spark.sql.catalog.$catalogName",
+      classOf[InMemoryChangelogCatalog].getName)
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    val cat = catalog
+    if (cat.tableExists(ident)) cat.dropTable(ident)
+    cat.clearChangeRows(ident)
+    cat.createTable(
+      ident,
+      Array(
+        Column.create("id", LongType),
+        Column.create("name", StringType),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+    cat.setChangelogProperties(ident, ChangelogProperties(
+      containsIntermediateChanges = true,
+      containsCarryoverRows = false,
+      representsUpdateAsDeleteAndInsert = representsUpdateAsDeleteAndInsert,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+  }
+
+  private def catalog: InMemoryChangelogCatalog =
+    spark.sessionState.catalogManager
+      .catalog(catalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+
+  private def ident = Identifier.of(Array.empty, testTableName)
+
+  // ---------------------------------------------------------------------------
+  // Input helpers
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Builds a single change row matching the table schema
+   * `(id, name, row_commit_version, _change_type, _commit_version, _commit_timestamp)`.
+   * `row_commit_version` is set to `commitVersion`; these tests do not exercise
+   * carry-over removal, so the rcv value does not matter for assertions.
+   */
+  private def cdcEntry(
+      id: Long, name: String, changeType: String, commitVersion: Long): InternalRow = {
+    InternalRow(
+      id,
+      UTF8String.fromString(name),
+      commitVersion,
+      UTF8String.fromString(changeType),
+      commitVersion,
+      0L)
+  }
+
+  private def addInsert(commitVersion: Long, id: Long, name: String): Unit =
+    catalog.addChangeRows(ident, Seq(cdcEntry(id, name, CHANGE_TYPE_INSERT, commitVersion)))
+
+  private def addDelete(commitVersion: Long, id: Long, name: String): Unit =
+    catalog.addChangeRows(ident, Seq(cdcEntry(id, name, CHANGE_TYPE_DELETE, commitVersion)))
+
+  private def addUpdatePre(commitVersion: Long, id: Long, name: String): Unit = {
+    val changeType =
+      if (representsUpdateAsDeleteAndInsert) CHANGE_TYPE_DELETE
+      else CHANGE_TYPE_UPDATE_PREIMAGE
+    catalog.addChangeRows(ident, Seq(cdcEntry(id, name, changeType, commitVersion)))
+  }
+
+  private def addUpdatePost(commitVersion: Long, id: Long, name: String): Unit = {
+    val changeType =
+      if (representsUpdateAsDeleteAndInsert) CHANGE_TYPE_INSERT
+      else CHANGE_TYPE_UPDATE_POSTIMAGE
+    catalog.addChangeRows(ident, Seq(cdcEntry(id, name, changeType, commitVersion)))
+  }
+
+  // ---------------------------------------------------------------------------
+  // Expected output helpers
+  // ---------------------------------------------------------------------------
+
+  private def expectInsert(version: Long, id: Long, name: String): Row =
+    Row(id, name, CHANGE_TYPE_INSERT, version)
+
+  private def expectDelete(version: Long, id: Long, name: String): Row =
+    Row(id, name, CHANGE_TYPE_DELETE, version)
+
+  /**
+   * Mode-dependent target label for the FIRST emitted row of a partition where
+   * `existedBefore=true, existsAfter=true`. Mirrors the production rule's
+   * `computedPreUpdateLabel` selection: `update_preimage` under
+   * `computeUpdates = true`, `delete` under `computeUpdates = false`.
+   */
+  private def computedPreUpdateLabel: String =
+    if (computeUpdates) CHANGE_TYPE_UPDATE_PREIMAGE else CHANGE_TYPE_DELETE
+
+  /**
+   * Mode-dependent target label for the LAST emitted row of a partition where
+   * `existedBefore=true, existsAfter=true`. Mirrors the production rule's
+   * `computedPostUpdateLabel` selection: `update_postimage` under
+   * `computeUpdates = true`, `insert` under `computeUpdates = false`.
+   */
+  private def computedPostUpdateLabel: String =
+    if (computeUpdates) CHANGE_TYPE_UPDATE_POSTIMAGE else CHANGE_TYPE_INSERT
+
+  // ---------------------------------------------------------------------------
+  // Query helper
+  // ---------------------------------------------------------------------------
+
+  private def runNetChanges(fromV: Long, toV: Long): DataFrame =
+    sql(
+      s"SELECT id, name, _change_type, _commit_version " +
+      s"FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION $fromV TO VERSION $toV " +
+      s"WITH (deduplicationMode = 'netChanges', computeUpdates = '$computeUpdates')")
+
+  // ===========================================================================
+  // Single-event: a lone insert or delete passes through netChanges
+  // ===========================================================================
+
+  test("single insert survives netChanges (wide range FROM 1 TO 10)") {
+    addInsert(commitVersion = 3L, id = 1L, name = "Alice")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(expectInsert(3L, 1L, "Alice")))
+  }
+
+  test("single insert survives netChanges (single-version range FROM 3 TO 3)") {
+    addInsert(commitVersion = 3L, id = 1L, name = "Alice")
+    checkAnswer(
+      runNetChanges(fromV = 3, toV = 3),
+      Seq(expectInsert(3L, 1L, "Alice")))
+  }
+
+  test("single delete survives netChanges (wide range FROM 1 TO 10)") {
+    addDelete(commitVersion = 3L, id = 1L, name = "Alice")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(expectDelete(3L, 1L, "Alice")))
+  }
+
+  test("single delete survives netChanges (single-version range FROM 3 TO 3)") {
+    addDelete(commitVersion = 3L, id = 1L, name = "Alice")
+    checkAnswer(
+      runNetChanges(fromV = 3, toV = 3),
+      Seq(expectDelete(3L, 1L, "Alice")))
+  }
+
+  // ===========================================================================
+  // Full matrix: all 9 (first_change_type, last_change_type) cells
+  // ===========================================================================
+
+  test("matrix: (insert, delete) cancels out") {
+    addInsert(commitVersion = 2, id = 1L, name = "Alice")
+    addDelete(commitVersion = 5, id = 1L, name = "Alice")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq.empty[Row])
+  }
+
+  test("matrix: (insert, insert) emits the last insert") {
+    // Lifecycle: insert(2), delete(3), re-insert(5).
+    addInsert(commitVersion = 2, id = 1L, name = "Alice")
+    addDelete(commitVersion = 3, id = 1L, name = "Alice")
+    addInsert(commitVersion = 5, id = 1L, name = "Alice_v2")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(expectInsert(5L, 1L, "Alice_v2")))
+  }
+
+  test("matrix: (insert, update_post) emits last as insert") {
+    // Lifecycle: insert(2), update_pre/post(5).
+    addInsert(commitVersion = 2, id = 1L, name = "Alice")
+    addUpdatePre(commitVersion = 5, id = 1L, name = "Alice")
+    addUpdatePost(commitVersion = 5, id = 1L, name = "Alice_v2")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(expectInsert(5L, 1L, "Alice_v2")))
+  }
+
+  test("matrix: (update_pre, update_post) emits PRE + POST") {
+    // Lifecycle: update_pre/post(3) -- pure UPDATE(s) case.
+    addUpdatePre(commitVersion = 3, id = 1L, name = "Alice")
+    addUpdatePost(commitVersion = 3, id = 1L, name = "Alice_v2")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(
+        Row(1L, "Alice", computedPreUpdateLabel, 3L),
+        Row(1L, "Alice_v2", computedPostUpdateLabel, 3L)))
+  }
+
+  test("matrix: (update_pre, insert) emits PRE + POST") {
+    // Lifecycle: update_pre/post(2), delete(3), re-insert(5).
+    addUpdatePre(commitVersion = 2, id = 1L, name = "Alice")
+    addUpdatePost(commitVersion = 2, id = 1L, name = "Alice_v2")
+    addDelete(commitVersion = 3, id = 1L, name = "Alice_v2")
+    addInsert(commitVersion = 5, id = 1L, name = "Alice_resurrected")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(
+        Row(1L, "Alice", computedPreUpdateLabel, 2L),
+        Row(1L, "Alice_resurrected", computedPostUpdateLabel, 5L)))
+  }
+
+  test("matrix: (delete, update_post) emits PRE + POST") {
+    // Lifecycle: delete(2), insert(3), update_pre/post(5).
+    addDelete(commitVersion = 2, id = 1L, name = "Alice")
+    addInsert(commitVersion = 3, id = 1L, name = "Alice_v2")
+    addUpdatePre(commitVersion = 5, id = 1L, name = "Alice_v2")
+    addUpdatePost(commitVersion = 5, id = 1L, name = "Alice_v3")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(
+        Row(1L, "Alice", computedPreUpdateLabel, 2L),
+        Row(1L, "Alice_v3", computedPostUpdateLabel, 5L)))
+  }
+
+  test("matrix: (delete, insert) emits PRE + POST") {
+    // Lifecycle: delete(2), re-insert(5) -- raw delete + insert across versions.
+    addDelete(commitVersion = 2, id = 1L, name = "Alice")
+    addInsert(commitVersion = 5, id = 1L, name = "Alice_resurrected")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(
+        Row(1L, "Alice", computedPreUpdateLabel, 2L),
+        Row(1L, "Alice_resurrected", computedPostUpdateLabel, 5L)))
+  }
+
+  test("matrix: (update_pre, delete) emits first as delete") {
+    // Lifecycle: update_pre/post(3), delete(5).
+    addUpdatePre(commitVersion = 3, id = 1L, name = "Alice")
+    addUpdatePost(commitVersion = 3, id = 1L, name = "Alice_v2")
+    addDelete(commitVersion = 5, id = 1L, name = "Alice_v2")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(expectDelete(3L, 1L, "Alice")))
+  }
+
+  test("matrix: (delete, delete) emits the first delete") {
+    // Lifecycle: delete(2), insert(3), delete(5).
+    addDelete(commitVersion = 2, id = 1L, name = "Alice")
+    addInsert(commitVersion = 3, id = 1L, name = "Alice_v2")
+    addDelete(commitVersion = 5, id = 1L, name = "Alice_v2")
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(expectDelete(2L, 1L, "Alice")))
+  }
+
+  // ===========================================================================
+  // Range-narrowing: events outside the requested range must not show up
+  // ===========================================================================
+
+  test("range narrowing: only events inside [from, to] reach netChanges") {
+    // Lifecycle: insert(v2) -- update_pre/post(v5) -- delete(v8). The narrow
+    // query [v3, v6] should see only the update at v5, which collapses to a
+    // single PRE + POST pair.
+    addInsert(commitVersion = 2, id = 1L, name = "Alice")
+    addUpdatePre(commitVersion = 5, id = 1L, name = "Alice")
+    addUpdatePost(commitVersion = 5, id = 1L, name = "Alice_v2")
+    addDelete(commitVersion = 8, id = 1L, name = "Alice_v2")
+
+    checkAnswer(
+      runNetChanges(fromV = 3, toV = 6),
+      Seq(
+        Row(1L, "Alice", computedPreUpdateLabel, 5L),
+        Row(1L, "Alice_v2", computedPostUpdateLabel, 5L)))
+  }
+
+  // ===========================================================================
+  // Multi-rowId: each rowId's lifecycle collapses independently
+  // ===========================================================================
+
+  test("multi-rowId table lifecycle: each rowId collapses independently") {
+    // v1: 4 inserts.
+    addInsert(commitVersion = 1, id = 1L, name = "Alice")
+    addInsert(commitVersion = 1, id = 2L, name = "Bob")
+    addInsert(commitVersion = 1, id = 3L, name = "Carol")
+    addInsert(commitVersion = 1, id = 4L, name = "Dave")
+
+    // v2: update id=3 (emitted as native pre/post pair by the test connector).
+    addUpdatePre(commitVersion = 2, id = 3L, name = "Carol")
+    addUpdatePost(commitVersion = 2, id = 3L, name = "Carol_v2")
+
+    // v3: 2 inserts.
+    addInsert(commitVersion = 3, id = 5L, name = "Eve")
+    addInsert(commitVersion = 3, id = 6L, name = "Frank")
+
+    // v4: 2 deletes.
+    addDelete(commitVersion = 4, id = 1L, name = "Alice")
+    addDelete(commitVersion = 4, id = 2L, name = "Bob")
+
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 4),
+      Seq(
+        expectInsert(2L, 3L, "Carol_v2"),  // id=3: insert + update -> last as insert
+        expectInsert(1L, 4L, "Dave"),      // id=4: lone insert
+        expectInsert(3L, 5L, "Eve"),       // id=5: lone insert
+        expectInsert(3L, 6L, "Frank")))    // id=6: lone insert
+  }
+
+  test("multi-rowId hitting different mode-dependent cells in one query") {
+    addDelete(commitVersion = 2, id = 1L, name = "Alice")
+    addInsert(commitVersion = 5, id = 1L, name = "Alice_resurrected")
+
+    addUpdatePre(commitVersion = 3, id = 2L, name = "Bob")
+    addUpdatePost(commitVersion = 3, id = 2L, name = "Bob_v2")
+
+    addInsert(commitVersion = 4, id = 3L, name = "Carol")
+    addDelete(commitVersion = 6, id = 3L, name = "Carol")
+
+    checkAnswer(
+      runNetChanges(fromV = 1, toV = 10),
+      Seq(
+        // id=1: (delete, insert) -- first + last with mode-dependent labels.
+        Row(1L, "Alice", computedPreUpdateLabel, 2L),
+        Row(1L, "Alice_resurrected", computedPostUpdateLabel, 5L),
+        // id=2: (update_pre, update_post) -- PRE + POST with mode-dependent labels.
+        Row(2L, "Bob", computedPreUpdateLabel, 3L),
+        Row(2L, "Bob_v2", computedPostUpdateLabel, 3L)
+        // id=3: (insert, delete) -- cancel, no rows.
+      ))
+  }
+}
+
+/**
+ * Runs the netChanges test bodies with `computeUpdates = true`:
+ * `existedBefore=true, existsAfter=true` partitions emit `update_preimage` +
+ * `update_postimage`.
+ */
+class ResolveChangelogTableNetChangesWithComputeUpdatesSuite
+    extends ResolveChangelogTableNetChangesTestsBase {
+  override protected def computeUpdates: Boolean = true
+}
+
+/**
+ * Runs the netChanges test bodies with `computeUpdates = false`:
+ * `existedBefore=true, existsAfter=true` partitions emit `delete` + `insert`
+ * (per SQL Ref Spec footnote).
+ */
+class ResolveChangelogTableNetChangesWithoutComputeUpdatesSuite
+    extends ResolveChangelogTableNetChangesTestsBase {
+  override protected def computeUpdates: Boolean = false
+}
+
+/**
+ * Runs the netChanges test bodies against a connector with
+ * `representsUpdateAsDeleteAndInsert = true` and `computeUpdates = true`. Update
+ * detection runs upstream of netChanges, exercising the chained post-processing
+ * pipeline end-to-end. The `addUpdatePre` / `addUpdatePost` helpers emit decomposed
+ * `delete` / `insert` events that update detection relabels back to update pre/post
+ * before netChanges sees them, so the same expected outputs hold.
+ */
+class ResolveChangelogTableNetChangesWithUpdateDetectionSuite
+    extends ResolveChangelogTableNetChangesTestsBase {
+  override protected def computeUpdates: Boolean = true
+  override protected def representsUpdateAsDeleteAndInsert: Boolean = true
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala
new file mode 100644
index 0000000000000..c48ced72a15cc
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala
@@ -0,0 +1,982 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
+import org.apache.spark.sql.connector.catalog.{
+  ChangelogProperties, Column, Identifier, InMemoryChangelogCatalog}
+import org.apache.spark.sql.connector.catalog.Changelog.{
+  CHANGE_TYPE_DELETE, CHANGE_TYPE_INSERT, CHANGE_TYPE_UPDATE_POSTIMAGE,
+  CHANGE_TYPE_UPDATE_PREIMAGE}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.execution.datasources.v2.ChangelogTable
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{
+  BinaryType, BooleanType, DoubleType, LongType, StringType, StructField, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Tests for [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]] using the
+ * in-memory changelog catalog. These tests don't depend on Delta or any specific connector;
+ * they directly control what the connector "returns" by populating the in-memory changelog
+ * with hand-crafted change rows.
+ *
+ * Each test sets up [[ChangelogProperties]] on the catalog to enable specific post-processing
+ * paths (carry-over removal, update detection) and then verifies that Spark's analyzer rule
+ * correctly transforms the plan and produces the expected output.
+ */
+class ResolveChangelogTablePostProcessingSuite
+    extends QueryTest
+    with SharedSparkSession
+    with BeforeAndAfterEach {
+
+  private val catalogName = "cdc_test_catalog"
+  private val testTableName = "events"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(
+      s"spark.sql.catalog.$catalogName",
+      classOf[InMemoryChangelogCatalog].getName)
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    val cat = catalog
+    val ident = Identifier.of(Array.empty, testTableName)
+    if (cat.tableExists(ident)) cat.dropTable(ident)
+    cat.clearChangeRows(ident)
+    cat.setChangelogProperties(ident, ChangelogProperties())
+    cat.createTable(
+      ident,
+      Array(
+        Column.create("id", LongType),
+        Column.create("name", StringType),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+  }
+
+  private def catalog: InMemoryChangelogCatalog = {
+    spark.sessionState.catalogManager
+      .catalog(catalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+  }
+
+  private def ident = Identifier.of(Array.empty, testTableName)
+
+  /**
+   * Helper to create a change row matching schema
+   * (id, name, row_commit_version, _change_type, _commit_version, _commit_timestamp).
+   *
+   * `rowCommitVersion` follows Delta row-tracking semantics: carry-over pairs (CoW-rewritten
+   * unchanged rows) share the same value on both sides; real updates carry the OLD value on
+   * the delete side and the NEW value on the insert side. Defaults to `commitVersion` for
+   * tests that don't exercise carry-over removal.
+   */
+  private def changeRow(
+      id: Long,
+      name: String,
+      changeType: String,
+      commitVersion: Long,
+      rowCommitVersion: Long = -1L,
+      commitTimestamp: Long = 0L): InternalRow = {
+    val rcv = if (rowCommitVersion == -1L) commitVersion else rowCommitVersion
+    InternalRow(
+      id,
+      UTF8String.fromString(name),
+      rcv,
+      UTF8String.fromString(changeType),
+      commitVersion,
+      commitTimestamp)
+  }
+
+  // ===========================================================================
+  // Carry-Over Removal
+  // ===========================================================================
+
+  test("carry-over removal drops identical delete+insert pairs") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // v1: insert Alice and Bob (rcv=1 each)
+    // v2: real delete Alice (preimage carries old rcv=1);
+    //     carry-over for Bob (CoW, rcv unchanged on both sides)
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),  // carry-over
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L))) // carry-over (same rcv)
+
+    checkAnswer(
+      sql(
+        s"SELECT id, name, _change_type, _commit_version " +
+        s"FROM $catalogName.$testTableName CHANGES FROM VERSION 1 TO VERSION 2"),
+      Seq(
+        Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+        Row(2L, "Bob", CHANGE_TYPE_INSERT, 1L),
+        Row(1L, "Alice", CHANGE_TYPE_DELETE, 2L)))
+  }
+
+  test("deduplicationMode=none keeps all carry-over rows") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L)))
+
+    checkAnswer(
+      sql(
+        s"SELECT id FROM $catalogName.$testTableName " +
+        s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (deduplicationMode = 'none')"),
+      Seq(Row(1L), Row(2L), Row(2L)))
+  }
+
+  test("NULL rowVersion on one side is NOT silently dropped as carry-over") {
+    // Regression for a NULL-safety hole: min/max skip NULLs, so _min_rv = _max_rv alone
+    // would match a pair with one NULL and one non-null rowVersion. The _rv_cnt = 2
+    // clause in the carry-over filter prevents that.
+    //
+    // The fixture table here declares `row_commit_version` as nullable so the optimizer
+    // is not allowed to fold IsNull(non-nullable-col) to false; the NULL is a legitimate
+    // value the guard must defend against.
+    val nullableRcvTable = "events_nullable_rcv"
+    val nullableIdent = Identifier.of(Array.empty, nullableRcvTable)
+    val cat = catalog
+    if (cat.tableExists(nullableIdent)) cat.dropTable(nullableIdent)
+    cat.clearChangeRows(nullableIdent)
+    cat.createTable(
+      nullableIdent,
+      Array(
+        Column.create("id", LongType),
+        Column.create("name", StringType),
+        Column.create("row_commit_version", LongType, true)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+    cat.setChangelogProperties(nullableIdent, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    cat.addChangeRows(nullableIdent, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      // v2: one side has NULL rowVersion (buggy connector), the other has a real value.
+      InternalRow(1L, UTF8String.fromString("Alice"), null,
+        UTF8String.fromString(CHANGE_TYPE_DELETE), 2L, 0L),
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 5L)))
+
+    checkAnswer(
+      sql(s"SELECT id, name, _change_type, _commit_version " +
+          s"FROM $catalogName.$nullableRcvTable CHANGES FROM VERSION 1 TO VERSION 2"),
+      Seq(
+        Row(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+        Row(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+        Row(1L, "Alice", CHANGE_TYPE_INSERT, 2L)))
+  }
+
+  // ===========================================================================
+  // Update Detection
+  // ===========================================================================
+
+  test("update detection relabels delete+insert with different data as update") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = false,  // no carry-overs in this test
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      // v2: Alice -> Robert (delete old, insert new)
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+      changeRow(1L, "Robert", CHANGE_TYPE_INSERT, 2L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type, _commit_version " +
+      s"FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')")
+      .orderBy("_commit_version", "_change_type")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}")
+
+    assert(descs.contains("1:Alice:insert"), s"v1 insert. Got: ${descs.mkString(",")}")
+    assert(descs.contains("1:Alice:update_preimage"))
+    assert(descs.contains("1:Robert:update_postimage"))
+    // No raw delete/insert at v2
+    assert(!descs.contains("1:Alice:delete"))
+    assert(!descs.contains("1:Robert:insert"))
+  }
+
+  test("delete and insert in different versions are NOT labeled as update") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 3L)))
+
+    val rows = sql(
+      s"SELECT _change_type, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 3 " +
+      s"WITH (computeUpdates = 'true', deduplicationMode = 'none')")
+      .collect()
+
+    assert(!rows.exists(_.getString(0).contains("update_")),
+      "Delete and insert in different versions should not be labeled as update")
+  }
+
+  // ===========================================================================
+  // Composite rowId: partitioning uses every rowId column
+  // ===========================================================================
+  //
+  // With a composite rowId such as Seq("id", "name"), the (rowId, _commit_version)
+  // window partition must include BOTH columns. A regression that drops one of the
+  // rowId columns would either falsely merge two different row identities into one
+  // partition (silently mislabeling unrelated delete/insert pairs as updates) or
+  // trip the UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION runtime guard.
+
+  test("update detection with composite rowId keeps different (id, name) tuples raw") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id", "name"),
+      rowVersionName = Some("row_commit_version")))
+
+    // delete (1, Alice) and insert (1, Bob) at v2. These are DIFFERENT composite
+    // rowIds; they must NOT be relabeled as update.
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+      changeRow(1L, "Bob", CHANGE_TYPE_INSERT, 2L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (computeUpdates = 'true')")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}").toSet
+
+    assert(descs == Set("1:Alice:delete", "1:Bob:insert"),
+      s"Composite rowId must keep different (id, name) tuples raw. Got: $descs")
+  }
+
+  test("carry-over removal with composite rowId removes pairs per (id, name) tuple") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id", "name"),
+      rowVersionName = Some("row_commit_version")))
+
+    // Two independent carry-over pairs at v2, both with id=1 but different names.
+    // With correct composite-rowId partitioning, each pair lives in its own
+    // (id, name, _commit_version) partition, has _del_cnt=1 / _ins_cnt=1 and equal
+    // _min_rv / _max_rv, and gets dropped. With broken (id-only) partitioning, the
+    // four rows would collapse into one partition with _del_cnt=2 / _ins_cnt=2 and
+    // the carry-over filter (which requires =1) would keep them all.
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(1L, "Bob", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L),
+      changeRow(1L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(1L, "Bob", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type, _commit_version " +
+      s"FROM $catalogName.$testTableName CHANGES FROM VERSION 2 TO VERSION 2")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}")
+    assert(rows.isEmpty,
+      s"Both Alice and Bob carry-over pairs at v2 should be removed. Got: ${descs.mkString(",")}")
+  }
+
+  // ===========================================================================
+  // No row identity: post-processing skipped
+  // ===========================================================================
+
+  test("no capability flags -> post-processing not injected in plan") {
+    // Default ChangelogProperties has no capability flags set; the rule sees nothing to do.
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L)))
+
+    val df = sql(
+      s"SELECT * FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')")
+
+    val plan = df.queryExecution.analyzed.treeString
+    assert(!plan.contains("__spark_cdc_del_cnt"),
+      s"Plan must not contain post-processing window helpers. Plan:\n$plan")
+    assert(!plan.contains("__spark_cdc_ins_cnt"),
+      s"Plan must not contain post-processing window helpers. Plan:\n$plan")
+  }
+
+  test("streaming without post-processing options passes through") {
+    // Streaming reads with no capability flags on the connector and no
+    // post-processing options must resolve without the rule throwing.
+    val df = spark.readStream
+      .option("startingVersion", "1")
+      .changes(s"$catalogName.$testTableName")
+    val analyzed = df.queryExecution.analyzed
+    val plan = analyzed.treeString
+    assert(!plan.contains("__spark_cdc_del_cnt"),
+      s"Streaming plan must not contain post-processing helpers. Plan:\n$plan")
+
+    // Positive assertion: the rule actually fired on the streaming relation. Without this,
+    // a regression that deletes the streaming arm of `ResolveChangelogTable.apply` would
+    // also pass the absence-of-helpers check above.
+    val tableResolved = analyzed.collectFirst {
+      case rel: StreamingRelationV2 if rel.table.isInstanceOf[ChangelogTable] =>
+        rel.table.asInstanceOf[ChangelogTable].resolved
+    }
+    assert(tableResolved.contains(true),
+      s"Expected ChangelogTable to be marked resolved by the rule. Plan:\n$plan")
+  }
+
+  // The streaming netChanges path is covered by
+  // ResolveChangelogTableStreamingPostProcessingSuite -- not duplicated here, since
+  // this suite focuses on the batch path.
+
+  // ===========================================================================
+  // Combined
+  // ===========================================================================
+
+  test("carry-over removal and update detection combined") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // v1: insert Alice (rcv=1), Bob (rcv=1)
+    // v2: Alice carry-over (CoW, rcv unchanged), Bob real update (old rcv=1, new rcv=2)
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),   // carry-over
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L),   // carry-over
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L), // update preimage
+      changeRow(2L, "Robert", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 2L))) // update postimage
+
+    val rows = sql(
+      s"SELECT id, name, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')")
+      .orderBy("_commit_version", "id", "_change_type")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}").toSet
+
+    // v1 inserts
+    assert(descs.contains("1:Alice:insert"))
+    assert(descs.contains("2:Bob:insert"))
+    // Alice carry-over dropped
+    assert(!descs.contains("1:Alice:delete"))
+    // Bob -> Robert as update
+    assert(descs.contains("2:Bob:update_preimage"))
+    assert(descs.contains("2:Robert:update_postimage"))
+    // Should be exactly 4 rows
+    assert(rows.length == 4, s"Expected 4 rows, got ${rows.length}: ${descs.mkString(",")}")
+  }
+
+  // ===========================================================================
+  // computeUpdates default (false) keeps raw delete+insert
+  // ===========================================================================
+
+  test("without computeUpdates, delete+insert with different data stays raw") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      // Alice: carry-over (CoW, rcv unchanged on both sides)
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L),
+      // Bob -> Robert: real change (old rcv on pre, new rcv on post)
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(2L, "Robert", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 2L)))
+
+    // Default computeUpdates=false: do NOT relabel, but DO drop carry-overs
+    val rows = sql(
+      s"SELECT id, name, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2")
+      .orderBy("_commit_version", "id", "_change_type")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}")
+
+    assert(descs.contains("2:Bob:delete"), s"Bob delete remains raw. Got: ${descs.mkString(",")}")
+    assert(descs.contains("2:Robert:insert"), "Robert insert remains raw")
+    assert(!descs.exists(_.contains("update_")), "No update_* without computeUpdates")
+    assert(!descs.contains("1:Alice:delete"), "Alice carry-over removed")
+  }
+
+  test("update detection on pure inserts leaves them as inserts") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L)))
+
+    val rows = sql(
+      s"SELECT id, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')")
+      .collect()
+
+    assert(rows.length == 2)
+    assert(rows.forall(_.getString(1) == CHANGE_TYPE_INSERT),
+      s"Pure inserts must stay 'insert'. Got: ${rows.map(_.getString(1)).mkString(",")}")
+  }
+
+  // ===========================================================================
+  // Keep Carry-over Rows and deduplication flag tests
+  // ===========================================================================
+
+  test("computeUpdates with deduplicationMode=none is rejected on COW connector") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    checkError(
+      intercept[AnalysisException] {
+        sql(s"SELECT * FROM $catalogName.$testTableName " +
+          s"CHANGES FROM VERSION 1 TO VERSION 2 " +
+          s"WITH (computeUpdates = 'true', deduplicationMode = 'none')")
+      },
+      condition = "INVALID_CDC_OPTION.UPDATE_DETECTION_REQUIRES_CARRY_OVER_REMOVAL",
+      parameters = Map("changelogName" -> s"$catalogName.${testTableName}_changelog"))
+  }
+
+  test("computeUpdates with deduplicationMode=none is allowed on non-COW connector") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = false,  // MOR-style: no carry-overs possible
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      // v2: Alice -> Robert (delete old, insert new)
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+      changeRow(1L, "Robert", CHANGE_TYPE_INSERT, 2L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 " +
+      s"WITH (computeUpdates = 'true', deduplicationMode = 'none')")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}")
+    assert(descs.contains("1:Alice:update_preimage"),
+      s"Expected Alice update_preimage. Got: ${descs.mkString(",")}")
+    assert(descs.contains("1:Robert:update_postimage"),
+      s"Expected Robert update_postimage. Got: ${descs.mkString(",")}")
+  }
+
+  // ===========================================================================
+  // Contract enforcement: at most one delete + one insert per (rowId, version)
+  // ===========================================================================
+  //
+  // With `representsUpdateAsDeleteAndInsert = true` and `containsIntermediateChanges = false`,
+  // the `Changelog` contract guarantees at most one logical change per (rowId, _commit_version)
+  // partition. The update-relabel projection enforces this at runtime: if it sees more than one
+  // delete or more than one insert in a partition, it raises
+  // INVALID_CDC_OPTION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION instead of silently
+  // mislabeling extra rows as updates.
+
+  test("update detection raises on multiple inserts for same (rowId, _commit_version)") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // Contract violation: 2 inserts for id=1 at v2.
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+      changeRow(1L, "Alice2", CHANGE_TYPE_INSERT, 2L),
+      changeRow(1L, "Alice3", CHANGE_TYPE_INSERT, 2L)))
+
+    checkError(
+      intercept[SparkRuntimeException] {
+        sql(s"SELECT * FROM $catalogName.$testTableName " +
+          s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (computeUpdates = 'true')")
+          .collect()
+      },
+      condition = "CHANGELOG_CONTRACT_VIOLATION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION",
+      parameters = Map.empty)
+  }
+
+  test("update detection raises on multiple deletes for same (rowId, _commit_version)") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // Contract violation: 2 deletes for id=1 at v2.
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L),
+      changeRow(1L, "Alice2", CHANGE_TYPE_DELETE, 2L),
+      changeRow(1L, "Alice3", CHANGE_TYPE_INSERT, 2L)))
+
+    checkError(
+      intercept[SparkRuntimeException] {
+        sql(s"SELECT * FROM $catalogName.$testTableName " +
+          s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (computeUpdates = 'true')")
+          .collect()
+      },
+      condition = "CHANGELOG_CONTRACT_VIOLATION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION",
+      parameters = Map.empty)
+  }
+
+  // ===========================================================================
+  // Range edge cases
+  // ===========================================================================
+
+  test("multiple operations across versions") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      // v1: insert 3 rows (rcv=1 each)
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      // v2: delete Alice (preimage carries old rcv=1); CoW carry-overs for Bob/Charlie
+      //     keep rcv=1 on both sides (row unchanged).
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L),
+      // v3: update Bob -> Robert (old rcv=1, new rcv=3); CoW carry-over for Charlie (rcv=1)
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 3L, rowCommitVersion = 1L),
+      changeRow(2L, "Robert", CHANGE_TYPE_INSERT, 3L, rowCommitVersion = 3L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_DELETE, 3L, rowCommitVersion = 1L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_INSERT, 3L, rowCommitVersion = 1L),
+      // v4: insert Diana (rcv=4)
+      changeRow(4L, "Diana", CHANGE_TYPE_INSERT, 4L, rowCommitVersion = 4L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 4 WITH (computeUpdates = 'true')")
+      .orderBy("_commit_version", "id", "_change_type")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}").toSet
+
+    // v1
+    assert(descs.contains("1:Alice:insert:v1"))
+    assert(descs.contains("2:Bob:insert:v1"))
+    assert(descs.contains("3:Charlie:insert:v1"))
+    // v2
+    assert(descs.contains("1:Alice:delete:v2"))
+    assert(!descs.contains("2:Bob:delete:v2"), "Bob carry-over dropped")
+    assert(!descs.contains("3:Charlie:delete:v2"), "Charlie carry-over dropped")
+    // v3
+    assert(descs.contains("2:Bob:update_preimage:v3"))
+    assert(descs.contains("2:Robert:update_postimage:v3"))
+    assert(!descs.contains("3:Charlie:delete:v3"), "Charlie carry-over dropped in v3")
+    // v4
+    assert(descs.contains("4:Diana:insert:v4"))
+  }
+
+  test("larger insert batch returns all rows") {
+    catalog.addChangeRows(ident, (1 to 5).map(i =>
+      changeRow(i.toLong, ('A' + i - 1).toChar.toString, CHANGE_TYPE_INSERT, 1L)))
+
+    val rows = sql(
+      s"SELECT id, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 1 WITH (deduplicationMode = 'none')")
+      .collect()
+
+    assert(rows.length == 5)
+    assert(rows.forall(_.getString(1) == CHANGE_TYPE_INSERT))
+  }
+
+  test("DELETE all rows: no carry-over inserts at v2") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // v1 inserts carry rcv=1; v2 deletes carry the old rcv=1 (rcv tracks last modification)
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2")
+      .orderBy("_commit_version", "id")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}")
+
+    assert(descs.contains("1:Alice:insert:v1"))
+    assert(descs.contains("2:Bob:insert:v1"))
+    assert(descs.contains("1:Alice:delete:v2"))
+    assert(descs.contains("2:Bob:delete:v2"))
+    assert(!descs.exists(_.contains("insert:v2")), "No inserts at v2")
+  }
+
+  test("UPDATE all rows: every row gets update_pre/postimage, no carry-overs") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // Every v2 row is a real update: delete side carries old rcv=1, insert side new rcv=2.
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice_updated", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 2L),
+      changeRow(2L, "Bob", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(2L, "Bob_updated", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 2L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')")
+      .orderBy("_commit_version", "id", "_change_type")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}").toSet
+
+    assert(descs.contains("1:Alice:update_preimage:v2"))
+    assert(descs.contains("1:Alice_updated:update_postimage:v2"))
+    assert(descs.contains("2:Bob:update_preimage:v2"))
+    assert(descs.contains("2:Bob_updated:update_postimage:v2"))
+    assert(rows.length == 6, s"Expected 2 inserts + 2 pre + 2 post. Got ${rows.length}")
+  }
+
+  test("append-only workload: all inserts, no carry-over needed") {
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_INSERT, 3L)))
+
+    val rows = sql(
+      s"SELECT id, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 3")
+      .collect()
+
+    assert(rows.length == 3)
+    assert(rows.forall(_.getString(1) == CHANGE_TYPE_INSERT))
+  }
+
+  test("carry-over removal with many rows: only real change remains") {
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    // 10 inserts at v1 (rcv=1 each). At v2: delete row 5; CoW writes 9 carry-over pairs
+    // (rcv unchanged since v1, i.e. rcv=1 on both sides) plus 1 real delete (rcv=1, old).
+    val v1Inserts = (1 to 10).map(i =>
+      changeRow(
+        i.toLong, ('A' + i - 1).toChar.toString, CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L))
+    val v2Carryovers = (1 to 10).filter(_ != 5).flatMap { i =>
+      val name = ('A' + i - 1).toChar.toString
+      Seq(
+        changeRow(i.toLong, name, CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+        changeRow(i.toLong, name, CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L))
+    }
+    val v2RealDelete = Seq(changeRow(5L, "E", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L))
+    catalog.addChangeRows(ident, v1Inserts ++ v2Carryovers ++ v2RealDelete)
+
+    val rows = sql(
+      s"SELECT id, name, _change_type FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 2 TO VERSION 2")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}")
+    assert(rows.length == 1,
+      s"Only 1 real change should remain (9 carry-overs dropped). Got: ${descs.mkString(",")}")
+    assert(descs.contains("5:E:delete"))
+  }
+
+  test("carry-over removal with mixed types (DOUBLE, BOOLEAN, BINARY)") {
+    val mixedTable = "events_mixed"
+    val mixedIdent = Identifier.of(Array.empty, mixedTable)
+    val cat = catalog
+    if (cat.tableExists(mixedIdent)) cat.dropTable(mixedIdent)
+    cat.clearChangeRows(mixedIdent)
+    cat.createTable(
+      mixedIdent,
+      Array(
+        Column.create("id", LongType),
+        Column.create("name", StringType),
+        Column.create("score", DoubleType),
+        Column.create("active", BooleanType),
+        Column.create("payload", BinaryType),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+    cat.setChangelogProperties(mixedIdent, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    def mixedRow(
+        id: Long, name: String, score: Double, active: Boolean, payload: Array[Byte],
+        ct: String, v: Long, rowCommitVersion: Long): InternalRow = {
+      InternalRow(
+        id, UTF8String.fromString(name), score, active, payload, rowCommitVersion,
+        UTF8String.fromString(ct), v, 0L)
+    }
+
+    val alicePayload = Array[Byte](1, 2, 3)
+    val bobPayload = Array[Byte](4, 5, 6)
+
+    cat.addChangeRows(mixedIdent, Seq(
+      mixedRow(
+        1L, "Alice", 95.5, true, alicePayload, CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      mixedRow(
+        2L, "Bob", 87.3, false, bobPayload, CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      // v2: update Alice's score (old rcv=1, new rcv=2); Bob is carry-over (rcv unchanged)
+      mixedRow(
+        1L, "Alice", 95.5, true, alicePayload, CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      mixedRow(
+        1L, "Alice", 99.0, true, alicePayload, CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 2L),
+      mixedRow(
+        2L, "Bob", 87.3, false, bobPayload, CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      mixedRow(
+        2L, "Bob", 87.3, false, bobPayload, CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 1L)))
+
+    val rows = sql(
+      s"SELECT id, name, score, active, _change_type FROM $catalogName.$mixedTable " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')")
+      .orderBy("_commit_version", "id", "_change_type")
+      .collect()
+
+    val descs = rows.map(r => s"${r.getLong(0)}:${r.getString(4)}")
+    assert(descs.contains("1:update_preimage"))
+    assert(descs.contains("1:update_postimage"))
+    assert(!descs.contains("2:delete"),
+      s"Bob carry-over must be dropped despite DOUBLE/BOOLEAN/BINARY. Got: " +
+        descs.mkString(","))
+
+    val pre = rows.find(r =>
+      r.getLong(0) == 1L && r.getString(4) == CHANGE_TYPE_UPDATE_PREIMAGE).get
+    val post = rows.find(r =>
+      r.getLong(0) == 1L && r.getString(4) == CHANGE_TYPE_UPDATE_POSTIMAGE).get
+    assert(pre.getDouble(2) == 95.5)
+    assert(post.getDouble(2) == 99.0)
+  }
+
+  // ===========================================================================
+  // Regression: nested rowId + nested rowVersion end-to-end
+  // ===========================================================================
+
+  // End-to-end check that nested rowId paths (e.g. `payload.id`) are resolved on the plan
+  // and threaded through carry-over detection. The pair survives the filter because the
+  // row_commit_version differs across delete/insert, not because of any sibling-field data.
+  test("nested rowId path resolves correctly through carry-over filter") {
+    val nestedTable = "events_nested"
+    val nestedIdent = Identifier.of(Array.empty, nestedTable)
+    val cat = catalog
+    if (cat.tableExists(nestedIdent)) cat.dropTable(nestedIdent)
+    cat.clearChangeRows(nestedIdent)
+
+    val payloadType = StructType(Seq(
+      StructField("id", LongType),
+      StructField("value", StringType)))
+
+    cat.createTable(
+      nestedIdent,
+      Array(
+        Column.create("payload", payloadType),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+
+    cat.setChangelogProperties(nestedIdent, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdPaths = Seq(Seq("payload", "id")),
+      rowVersionName = Some("row_commit_version")))
+
+    def nestedRow(
+        id: Long, value: String, ct: String, v: Long, rowCommitVersion: Long): InternalRow = {
+      InternalRow(
+        InternalRow(id, UTF8String.fromString(value)),
+        rowCommitVersion,
+        UTF8String.fromString(ct), v, 0L)
+    }
+
+    cat.addChangeRows(nestedIdent, Seq(
+      nestedRow(1L, "original", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      // v2 update: rowId same, rowVersion differs (old rcv=1 on preimage, new rcv=2 on postimage)
+      nestedRow(1L, "original", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      nestedRow(1L, "CHANGED", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 2L)))
+
+    val rows = sql(
+      s"SELECT payload.id AS id, payload.value AS value, _change_type, _commit_version " +
+      s"FROM $catalogName.$nestedTable CHANGES FROM VERSION 1 TO VERSION 2")
+      .orderBy("_commit_version", "_change_type")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}")
+
+    assert(descs.contains("1:original:insert:v1"),
+      s"v1 insert must survive. Got: ${descs.mkString(",")}")
+    assert(descs.contains("1:original:delete:v2"),
+      s"v2 delete must survive (payload.value differs from insert). Got: ${descs.mkString(",")}")
+    assert(descs.contains("1:CHANGED:insert:v2"),
+      s"v2 insert must survive (payload.value differs from delete). Got: ${descs.mkString(",")}")
+    assert(rows.length == 3,
+      s"Expected 3 rows (v1 insert + v2 delete + v2 insert). Got ${rows.length}: " +
+      descs.mkString(","))
+  }
+
+  // ===========================================================================
+  // No-op UPDATE is correctly preserved as update_preimage/postimage
+  // ===========================================================================
+
+  test("no-op UPDATE is labeled as update (row_commit_version differs on pre/post)") {
+    // A no-op UPDATE bumps row_commit_version even when data is byte-identical, so the
+    // delete side carries the OLD rcv and the insert side the NEW rcv. Window post-processing
+    // sees different rowVersions, treats this as a real change, and labels both rows as
+    // update_preimage / update_postimage.
+    catalog.setChangelogProperties(ident, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L, rowCommitVersion = 1L),
+      // v2 no-op update: identical data, but rcv differs (Delta bumps it on any UPDATE)
+      changeRow(1L, "Alice", CHANGE_TYPE_DELETE, 2L, rowCommitVersion = 1L),
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 2L, rowCommitVersion = 2L)))
+
+    val rows = sql(
+      s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')")
+      .orderBy("_commit_version", "_change_type")
+      .collect()
+
+    val descs = rows.map(r =>
+      s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}")
+
+    assert(descs.contains("1:Alice:insert:v1"))
+    assert(descs.contains("1:Alice:update_preimage:v2"),
+      s"No-op UPDATE preimage must be labeled. Got: ${descs.mkString(",")}")
+    assert(descs.contains("1:Alice:update_postimage:v2"),
+      s"No-op UPDATE postimage must be labeled. Got: ${descs.mkString(",")}")
+    assert(rows.length == 3,
+      s"Expected v1 insert + v2 update pre/post = 3 rows. Got ${rows.length}")
+  }
+
+  // ===========================================================================
+  // Baseline (range syntax / connector range filtering -- rule bypassed via
+  // deduplicationMode = 'none'; included as smoke tests for the SQL surface).
+  // ===========================================================================
+
+  test("baseline: single-version range FROM VERSION X TO VERSION X") {
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 1L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_INSERT, 2L)))
+
+    val rows = sql(
+      s"SELECT id, _change_type, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (deduplicationMode = 'none')")
+      .collect()
+
+    assert(rows.length == 1, s"Single version: 1 row. Got ${rows.length}")
+    assert(rows(0).getLong(0) == 3L)
+    assert(rows(0).getString(1) == CHANGE_TYPE_INSERT)
+  }
+
+  test("baseline: EXCLUSIVE start bound skips the start version") {
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_INSERT, 3L)))
+
+    val rows = sql(
+      s"SELECT id, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 EXCLUSIVE TO VERSION 3 " +
+      s"WITH (deduplicationMode = 'none')")
+      .orderBy("_commit_version")
+      .collect()
+
+    assert(!rows.exists(_.getLong(1) == 1L), "v1 must be excluded")
+    assert(rows.exists(_.getLong(0) == 2L), "Bob (v2) included")
+    assert(rows.exists(_.getLong(0) == 3L), "Charlie (v3) included")
+  }
+
+  test("baseline: open-ended range (no TO clause) reads to latest") {
+    catalog.addChangeRows(ident, Seq(
+      changeRow(1L, "Alice", CHANGE_TYPE_INSERT, 1L),
+      changeRow(2L, "Bob", CHANGE_TYPE_INSERT, 2L),
+      changeRow(3L, "Charlie", CHANGE_TYPE_INSERT, 3L)))
+
+    val rows = sql(
+      s"SELECT id, _commit_version FROM $catalogName.$testTableName " +
+      s"CHANGES FROM VERSION 1 WITH (deduplicationMode = 'none')")
+      .orderBy("_commit_version", "id")
+      .collect()
+
+    assert(rows.length == 3, s"Open-ended range should see all 3. Got ${rows.length}")
+    assert(rows.exists(r => r.getLong(0) == 3L && r.getLong(1) == 3L))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTableStreamingPostProcessingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTableStreamingPostProcessingSuite.scala
new file mode 100644
index 0000000000000..3dc1cf506d33b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTableStreamingPostProcessingSuite.scala
@@ -0,0 +1,449 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.catalyst.expressions.Inline
+import org.apache.spark.sql.catalyst.plans.logical.{
+  Aggregate, EventTimeWatermark, Filter, Generate, LogicalPlan, Project, TransformWithState}
+import org.apache.spark.sql.connector.catalog.{
+  ChangelogProperties, Column, Identifier, InMemoryChangelogCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.LongType
+
+/**
+ * Plan-shape tests for the streaming arm of
+ * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]]. These mirror the batch
+ * checks in [[ResolveChangelogTablePostProcessingSuite]] but assert on the rewritten
+ * logical plan rather than running the streaming query end-to-end (the end-to-end
+ * coverage lives in [[ChangelogEndToEndSuite]]).
+ *
+ * The streaming row-level rewrite is:
+ *   EventTimeWatermark(_commit_timestamp, 0s)
+ *     -> Aggregate keyed by (rowId..., _commit_version, _commit_timestamp)
+ *     -> [Filter (carry-over)]
+ *     -> Generate(Inline(events))
+ *     -> [Project (update relabel)]
+ *     -> Project (drop helper columns)
+ */
+class ResolveChangelogTableStreamingPostProcessingSuite
+    extends QueryTest
+    with SharedSparkSession
+    with BeforeAndAfterEach {
+
+  private val catalogName = "cdc_streaming_pp"
+  private val testTableName = "events"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(
+      s"spark.sql.catalog.$catalogName",
+      classOf[InMemoryChangelogCatalog].getName)
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    val cat = catalog
+    val ident = identifier
+    if (cat.tableExists(ident)) cat.dropTable(ident)
+    cat.clearChangeRows(ident)
+    cat.setChangelogProperties(ident, ChangelogProperties())
+    cat.createTable(
+      ident,
+      Array(
+        Column.create("id", LongType, false),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+  }
+
+  private def catalog: InMemoryChangelogCatalog =
+    spark.sessionState.catalogManager
+      .catalog(catalogName)
+      .asInstanceOf[InMemoryChangelogCatalog]
+
+  private def identifier: Identifier = Identifier.of(Array.empty, testTableName)
+
+  private def streamingDf(opts: (String, String)*): DataFrame = {
+    val reader = spark.readStream.option("startingVersion", "1")
+    opts.foldLeft(reader) { case (r, (k, v)) => r.option(k, v) }
+      .changes(s"$catalogName.$testTableName")
+  }
+
+  private def assertWatermarkOnCommitTimestamp(plan: LogicalPlan): Unit = {
+    val wm = plan.collect { case w: EventTimeWatermark => w }
+    assert(wm.size == 1,
+      s"Expected exactly one EventTimeWatermark; found ${wm.size}. Plan:\n$plan")
+    assert(wm.head.eventTime.name == "_commit_timestamp",
+      s"Watermark must be on `_commit_timestamp` but was on `${wm.head.eventTime.name}`. " +
+        s"Plan:\n$plan")
+    assert(wm.head.delay.months == 0 && wm.head.delay.days == 0 &&
+      wm.head.delay.microseconds == 0L,
+      s"Watermark delay must be zero. Plan:\n$plan")
+  }
+
+  private def assertNoStreamingPostProcessing(plan: LogicalPlan): Unit = {
+    assert(plan.collect { case w: EventTimeWatermark => w }.isEmpty,
+      s"No EventTimeWatermark expected for raw streaming pass-through. Plan:\n$plan")
+    val planStr = plan.treeString
+    assert(!planStr.contains("__spark_cdc_"),
+      s"Helper columns must not appear in pass-through plan. Plan:\n$planStr")
+  }
+
+  private def assertHelperColumnsRemoved(plan: LogicalPlan): Unit = {
+    val outputNames = plan.output.map(_.name).toSet
+    assert(!outputNames.exists(_.startsWith("__spark_cdc_")),
+      s"Helper columns must be dropped before the user-visible output. Output: " +
+        outputNames.mkString(", "))
+  }
+
+  private def assertNoWatermarkMetadataOnOutput(plan: LogicalPlan): Unit = {
+    import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
+    val leaks = plan.output.filter(_.metadata.contains(EventTimeWatermark.delayKey))
+    assert(leaks.isEmpty,
+      s"User-visible output must not carry EventTimeWatermark.delayKey metadata; " +
+        s"found on: ${leaks.map(_.name).mkString(",")}. Plan:\n$plan")
+  }
+
+  private def assertInlineGenerate(plan: LogicalPlan): Unit = {
+    val gens = plan.collect { case g: Generate => g }
+    assert(gens.size == 1,
+      s"Expected exactly one Generate; found ${gens.size}. Plan:\n$plan")
+    assert(gens.head.generator.isInstanceOf[Inline],
+      s"Generate must use Inline. Plan:\n$plan")
+  }
+
+  private def assertContainsNullCommitTimestampGuard(plan: LogicalPlan): Unit = {
+    import org.apache.spark.sql.catalyst.analysis.CdcAssertCommitTimestampNotNull
+    val nullGuards = plan.collect {
+      case f: Filter
+        if f.condition.isInstanceOf[CdcAssertCommitTimestampNotNull] => f
+    }
+    assert(nullGuards.size == 1,
+      s"Expected exactly one CdcAssertCommitTimestampNotNull guard Filter. Plan:\n$plan")
+  }
+
+  // ===========================================================================
+  // Carry-over removal only
+  // ===========================================================================
+
+  test("carry-over removal injects watermark + Aggregate + Filter + Generate") {
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf().queryExecution.analyzed
+    assertWatermarkOnCommitTimestamp(analyzed)
+
+    val aggs = analyzed.collect { case a: Aggregate => a }
+    assert(aggs.size == 1, s"Expected one Aggregate. Plan:\n$analyzed")
+    val groupingNames = aggs.head.groupingExpressions.collect {
+      case ne: org.apache.spark.sql.catalyst.expressions.NamedExpression => ne.name
+    }
+    assert(groupingNames.toSet == Set("id", "_commit_version", "_commit_timestamp"),
+      s"Expected grouping by (id, _commit_version, _commit_timestamp); got $groupingNames")
+
+    // Two Filters: the NULL `_commit_timestamp` guard + the carry-over predicate.
+    val filters = analyzed.collect { case f: Filter => f }
+    assert(filters.size == 2,
+      s"Expected NULL guard + carry-over Filter. Plan:\n$analyzed")
+    assertContainsNullCommitTimestampGuard(analyzed)
+
+    assertInlineGenerate(analyzed)
+    assertHelperColumnsRemoved(analyzed)
+    assertNoWatermarkMetadataOnOutput(analyzed)
+  }
+
+  // ===========================================================================
+  // Update detection only
+  // ===========================================================================
+
+  test("update detection alone injects watermark + Aggregate + Generate + relabel Project") {
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf(
+      "computeUpdates" -> "true",
+      "deduplicationMode" -> "none").queryExecution.analyzed
+    assertWatermarkOnCommitTimestamp(analyzed)
+
+    // No carry-over Filter when only update detection runs -- but the NULL
+    // `_commit_timestamp` guard Filter is always present.
+    val filters = analyzed.collect { case f: Filter => f }
+    assert(filters.size == 1,
+      s"Only the NULL guard Filter is expected for update-detection-only path. " +
+        s"Plan:\n$analyzed")
+    assertContainsNullCommitTimestampGuard(analyzed)
+
+    assertInlineGenerate(analyzed)
+
+    // The relabel Project must reference _change_type (CaseWhen rewrites it).
+    val projects = analyzed.collect { case p: Project => p }
+    assert(projects.exists { p =>
+      p.projectList.exists(
+        _.toString.toLowerCase(java.util.Locale.ROOT).contains("update_preimage"))
+    }, s"Expected a Project that emits `update_preimage`. Plan:\n$analyzed")
+
+    assertHelperColumnsRemoved(analyzed)
+    assertNoWatermarkMetadataOnOutput(analyzed)
+  }
+
+  // ===========================================================================
+  // Both passes
+  // ===========================================================================
+
+  test("carry-over + update detection share a single Aggregate") {
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      containsCarryoverRows = true,
+      representsUpdateAsDeleteAndInsert = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf("computeUpdates" -> "true").queryExecution.analyzed
+    assertWatermarkOnCommitTimestamp(analyzed)
+
+    val aggs = analyzed.collect { case a: Aggregate => a }
+    assert(aggs.size == 1, s"Should fuse both passes into a single Aggregate. Plan:\n$analyzed")
+
+    // Two Filters: NULL guard + carry-over removal.
+    val filters = analyzed.collect { case f: Filter => f }
+    assert(filters.size == 2,
+      s"Expected NULL guard + carry-over Filter for combined path. Plan:\n$analyzed")
+    assertContainsNullCommitTimestampGuard(analyzed)
+
+    assertInlineGenerate(analyzed)
+    assertHelperColumnsRemoved(analyzed)
+    assertNoWatermarkMetadataOnOutput(analyzed)
+  }
+
+  // ===========================================================================
+  // Net changes
+  // ===========================================================================
+
+  test("netChanges alone injects watermark + TransformWithState") {
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf(
+      "deduplicationMode" -> "netChanges").queryExecution.analyzed
+    assertWatermarkOnCommitTimestamp(analyzed)
+    val tws = analyzed.collect { case t: TransformWithState => t }
+    assert(tws.size == 1,
+      s"Expected exactly one TransformWithState; found ${tws.size}. Plan:\n$analyzed")
+    // Guard against a regression that grouped by the wrong attributes (e.g. omitting a
+    // rowId column or grouping by `_commit_version`) -- the size check alone would still
+    // pass.
+    val groupingNames = tws.head.groupingAttributes.map(_.name)
+    assert(groupingNames == Seq("__spark_cdc_rowid_0"),
+      s"Expected TransformWithState grouping by [__spark_cdc_rowid_0]; got $groupingNames. " +
+        s"Plan:\n$analyzed")
+    assertHelperColumnsRemoved(analyzed)
+    // The auto-injected `EventTimeWatermark` metadata flows through the
+    // `transformWithState` encoder roundtrip on the netChanges-only path. The
+    // rewrite must strip it from the user-visible `_commit_timestamp` so a
+    // downstream user-supplied watermark cannot accidentally interact with our
+    // internal watermark via the global multi-watermark policy.
+    assertNoWatermarkMetadataOnOutput(analyzed)
+  }
+
+  test("netChanges with composite rowId groups by all helper columns") {
+    // Recreate with a two-column rowId so we exercise the rowIdColumn(idx) helper
+    // for idx > 0. The single-rowId test asserts the size-1 case; this guards
+    // against a regression that hard-codes a single helper attribute.
+    val cat = catalog
+    val ident = identifier
+    cat.dropTable(ident)
+    cat.createTable(
+      ident,
+      Array(
+        Column.create("ns", LongType, false),
+        Column.create("id", LongType, false),
+        Column.create("row_commit_version", LongType, false)),
+      Array.empty[Transform],
+      Collections.emptyMap[String, String]())
+    cat.setChangelogProperties(ident, ChangelogProperties(
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("ns", "id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf(
+      "deduplicationMode" -> "netChanges").queryExecution.analyzed
+    assertWatermarkOnCommitTimestamp(analyzed)
+    val tws = analyzed.collect { case t: TransformWithState => t }
+    assert(tws.size == 1, s"Expected one TransformWithState. Plan:\n$analyzed")
+    val groupingNames = tws.head.groupingAttributes.map(_.name)
+    assert(groupingNames == Seq("__spark_cdc_rowid_0", "__spark_cdc_rowid_1"),
+      s"Expected grouping by [__spark_cdc_rowid_0, __spark_cdc_rowid_1]; got $groupingNames. " +
+        s"Plan:\n$analyzed")
+    assertHelperColumnsRemoved(analyzed)
+  }
+
+  test("netChanges + carry-over removal share a single watermark") {
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      containsCarryoverRows = true,
+      containsIntermediateChanges = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf(
+      "deduplicationMode" -> "netChanges").queryExecution.analyzed
+    val watermarks = analyzed.collect { case w: EventTimeWatermark => w }
+    assert(watermarks.size == 1,
+      s"Combined row-level + netChanges path should share one EventTimeWatermark. " +
+        s"Plan:\n$analyzed")
+    val aggs = analyzed.collect { case a: Aggregate => a }
+    assert(aggs.size == 1,
+      s"Row-level Aggregate should still be present. Plan:\n$analyzed")
+    val tws = analyzed.collect { case t: TransformWithState => t }
+    assert(tws.size == 1,
+      s"netChanges TransformWithState should be on top of the row-level rewrite. " +
+        s"Plan:\n$analyzed")
+    assertHelperColumnsRemoved(analyzed)
+  }
+
+  // ===========================================================================
+  // No post-processing -> no rewrite
+  // ===========================================================================
+
+  test("no post-processing required: raw streaming relation passes through") {
+    // No capability flags set -> no Aggregate, no watermark.
+    val analyzed = streamingDf().queryExecution.analyzed
+    assertNoStreamingPostProcessing(analyzed)
+  }
+
+  test("computeUpdates without representsUpdateAsDeleteAndInsert: no rewrite") {
+    // Connector says updates are already materialized -> nothing to do.
+    val analyzed = streamingDf(
+      "computeUpdates" -> "true").queryExecution.analyzed
+    assertNoStreamingPostProcessing(analyzed)
+  }
+
+  // ===========================================================================
+  // Watermark metadata is internal-only and stripped from user-visible output
+  // ===========================================================================
+
+  test("watermark metadata is stripped from user-visible _commit_timestamp") {
+    import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf().queryExecution.analyzed
+    // Internally, the EventTimeWatermark must still be present.
+    assertWatermarkOnCommitTimestamp(analyzed)
+    // But none of the user-visible output attributes should leak the watermark
+    // metadata; downstream user-supplied watermarks must not interact with our
+    // auto-injected internal watermark via the global multi-watermark policy.
+    val ts = analyzed.output.find(_.name == "_commit_timestamp")
+    assert(ts.isDefined, s"Expected `_commit_timestamp` in output. Plan:\n$analyzed")
+    assert(!ts.get.metadata.contains(EventTimeWatermark.delayKey),
+      s"Watermark metadata leaked to user-visible `_commit_timestamp`. Plan:\n$analyzed")
+  }
+
+  // ===========================================================================
+  // NULL _commit_timestamp guard
+  // ===========================================================================
+
+  test("NULL _commit_timestamp guard Filter is the first operator after the source") {
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version")))
+
+    val analyzed = streamingDf().queryExecution.analyzed
+    import org.apache.spark.sql.catalyst.analysis.CdcAssertCommitTimestampNotNull
+    // The guard must sit BELOW the EventTimeWatermark (we don't want a NULL row to
+    // be considered for watermark advancement at all). Verify by walking the plan
+    // top-down and finding the guard before any Aggregate.
+    val guards = analyzed.collect {
+      case f: Filter if f.condition.isInstanceOf[CdcAssertCommitTimestampNotNull] => f
+    }
+    assert(guards.size == 1, s"Expected exactly one guard. Plan:\n$analyzed")
+    val guard = guards.head
+    val guardChild = guard.child
+    // The guard's child should be the bare relation (or a SubqueryAlias wrapping it),
+    // not the EventTimeWatermark.
+    val isSourceBelowGuard = guardChild match {
+      case _: org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 => true
+      case org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias(_,
+            _: org.apache.spark.sql.catalyst.streaming.StreamingRelationV2) => true
+      case _ => false
+    }
+    assert(isSourceBelowGuard,
+      s"NULL guard Filter should sit directly above the streaming relation. Plan:\n$analyzed")
+  }
+
+  test("NULL guard predicate is not foldable when _commit_timestamp is non-nullable") {
+    import org.apache.spark.sql.catalyst.analysis.CdcAssertCommitTimestampNotNull
+    import org.apache.spark.sql.catalyst.expressions.{IsNull, Literal}
+    import org.apache.spark.sql.catalyst.optimizer.NullPropagation
+    // Streaming plans can't be sent through the batch optimizer (UnsupportedOperationChecker
+    // rejects streaming sources in that path), so we directly exercise the rule that the
+    // reviewer flagged: NullPropagation must not eliminate our predicate even when the
+    // child column is non-nullable. Spark's NullPropagation simplifies `IsNull(c)` and
+    // `AssertNotNull(c)` to constants for non-nullable `c`, but it has no rule for
+    // `CdcAssertCommitTimestampNotNull`, so the predicate stays in place.
+    catalog.setChangelogProperties(identifier, ChangelogProperties(
+      containsCarryoverRows = true,
+      rowIdNames = Seq("id"),
+      rowVersionName = Some("row_commit_version"),
+      commitTimestampNullable = false))
+
+    val analyzed = streamingDf().queryExecution.analyzed
+    val tsAttrAnalyzed = analyzed.collect {
+      case rel: org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 =>
+        rel.output.find(_.name == "_commit_timestamp").get
+    }.head
+    assert(!tsAttrAnalyzed.nullable,
+      s"Test setup expected non-nullable `_commit_timestamp` on the source. Plan:\n$analyzed")
+
+    val guardsBefore = analyzed.collect {
+      case f: Filter if f.condition.isInstanceOf[CdcAssertCommitTimestampNotNull] => f
+    }
+    assert(guardsBefore.size == 1,
+      s"NULL guard must be present in the analyzed plan. Plan:\n$analyzed")
+
+    // Run NullPropagation on the predicate. It should be a no-op because the rule does
+    // not recognize `CdcAssertCommitTimestampNotNull`. (As a sanity check: the same rule
+    // would simplify a plain `IsNull(non-nullable)` to a Boolean literal.)
+    val tsAttr = guardsBefore.head.condition.asInstanceOf[CdcAssertCommitTimestampNotNull].child
+    val analyzedPredicate = guardsBefore.head.condition
+    val tsIsNullPlan = Filter(IsNull(tsAttr), analyzed)
+    val optimizedTsIsNull = NullPropagation(tsIsNullPlan).asInstanceOf[Filter].condition
+    assert(optimizedTsIsNull.isInstanceOf[Literal],
+      s"Sanity check: NullPropagation should fold IsNull(non-nullable) to a literal. " +
+        s"Got: $optimizedTsIsNull")
+
+    val cdcGuardPlan = Filter(analyzedPredicate, analyzed)
+    val optimizedGuard = NullPropagation(cdcGuardPlan).asInstanceOf[Filter].condition
+    assert(optimizedGuard.isInstanceOf[CdcAssertCommitTimestampNotNull],
+      s"NullPropagation must NOT simplify CdcAssertCommitTimestampNotNull. " +
+        s"Got: $optimizedGuard")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/RowLevelOperationSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/RowLevelOperationSuiteBase.scala
index 79387821bf087..0c465969e347c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/RowLevelOperationSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/RowLevelOperationSuiteBase.scala
@@ -28,16 +28,16 @@ import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expr
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReplaceData, WriteDelta}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.METADATA_COL_ATTR_KEY
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Delete, Identifier, InMemoryRowLevelOperationTable, InMemoryRowLevelOperationTableCatalog, Insert, MetadataColumn, Operation, Reinsert, TableInfo, Update, Write}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Delete, Identifier, InMemoryRowLevelOperationTable, InMemoryRowLevelOperationTableCatalog, Insert, MetadataColumn, Operation, Reinsert, Table, TableInfo, Txn, TxnTable, Update, Write}
 import org.apache.spark.sql.connector.expressions.LogicalExpressions.{identity, reference}
 import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.execution.{InSubqueryExec, QueryExecution, SparkPlan}
+import org.apache.spark.sql.connector.write.RowLevelOperationTable
+import org.apache.spark.sql.execution.{InSubqueryExec, SparkPlan}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation, DataSourceV2ScanRelation}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, StringType, StructField, StructType}
-import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
 
@@ -51,6 +51,7 @@ abstract class RowLevelOperationSuiteBase
   }
 
   after {
+    catalog.nextTxnRejectRegisteredScansAttempt = false
     spark.sessionState.catalogManager.reset()
     spark.sessionState.conf.unsetConf("spark.sql.catalog.cat")
   }
@@ -82,6 +83,7 @@ abstract class RowLevelOperationSuiteBase
   protected val namespace: Array[String] = Array("ns1")
   protected val ident: Identifier = Identifier.of(namespace, "test_table")
   protected val tableNameAsString: String = "cat." + ident.toString
+  protected val sourceNameAsString: String = "cat.ns1.source_table"
 
   protected def extraTableProps: java.util.Map[String, String] = {
     Collections.emptyMap[String, String]
@@ -93,7 +95,10 @@ abstract class RowLevelOperationSuiteBase
   }
 
   protected def table: InMemoryRowLevelOperationTable = {
-    catalog.loadTable(ident).asInstanceOf[InMemoryRowLevelOperationTable]
+    // Use liveTable, not loadTable, because loadTable returns a snapshot copy.
+    // Tests mutate state through this accessor (e.g., `table.increaseVersion()`) and need
+    // those mutations to be observable to subsequent loads.
+    catalog.liveTable(ident).asInstanceOf[InMemoryRowLevelOperationTable]
   }
 
   protected def createTable(schemaString: String): Unit = {
@@ -135,22 +140,28 @@ abstract class RowLevelOperationSuiteBase
 
   // executes an operation and keeps the executed plan
   protected def executeAndKeepPlan(func: => Unit): SparkPlan = {
-    var executedPlan: SparkPlan = null
+    withQueryExecutionsCaptured(spark)(func) match {
+      case Seq(qe) => stripAQEPlan(qe.executedPlan)
+      case other => fail(s"expected only one query execution, but got ${other.size}")
+    }
+  }
 
-    val listener = new QueryExecutionListener {
-      override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
-        executedPlan = qe.executedPlan
-      }
-      override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
+  protected def executeTransaction(func: => Unit): (Txn, Map[String, TxnTable]) = {
+    val tables = withQueryExecutionsCaptured(spark)(func).flatMap { qe =>
+      collectWithSubqueries(qe.executedPlan) {
+        case BatchScanExec(_, _, _, _, table: TxnTable, _) => table
+        case BatchScanExec(_, _, _, _, RowLevelOperationTable(table: TxnTable, _), _) => table
       }
     }
-    spark.listenerManager.register(listener)
-
-    func
-
-    sparkContext.listenerBus.waitUntilEmpty()
+    (catalog.lastTransaction, indexByName(tables))
+  }
 
-    stripAQEPlan(executedPlan)
+  protected def indexByName[T <: Table](tables: Seq[T]): Map[String, T] = {
+    tables.groupBy(_.name).map {
+      case (name, sameNameTables) =>
+        val Seq(table) = sameNameTables.distinct
+        name -> table
+    }
   }
 
   // executes an operation and extracts conditions from ReplaceData or WriteDelta
@@ -158,7 +169,7 @@ abstract class RowLevelOperationSuiteBase
     val Seq(qe) = withQueryExecutionsCaptured(spark)(func)
     qe.optimizedPlan.collectFirst {
       case rd: ReplaceData => (rd.condition, rd.groupFilterCondition)
-      case wd: WriteDelta => (wd.condition, None)
+      case wd: WriteDelta => (wd.condition, wd.groupFilterCondition)
     }.getOrElse(fail("couldn't find row-level operation in optimized plan"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala
new file mode 100644
index 0000000000000..9e365c720266a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog, SupportsNamespaces}
+import org.apache.spark.sql.connector.catalog.functions.{ScalarFunction, UnboundFunction}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
+
+/**
+ * End-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through non-session V2 catalogs.
+ *
+ * Other path tests live in `SetPathSuite` (session catalog) and `ProcedureSuite`
+ * (procedures via CALL). This suite specifically exercises:
+ *   - unqualified table resolution across two V2 catalogs in SET PATH,
+ *   - first-match ordering when both catalogs hold the same name,
+ *   - unqualified V2 function resolution across two V2 catalogs in SET PATH,
+ *   - the negative case where the unqualified name only lives in a catalog
+ *     that is NOT on the path.
+ */
+class SqlPathV2CatalogSuite extends SharedSparkSession {
+
+  private val emptyProps: java.util.Map[String, String] = Collections.emptyMap()
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set("spark.sql.catalog.pathcat", classOf[InMemoryCatalog].getName)
+    spark.conf.set("spark.sql.catalog.pathcat2", classOf[InMemoryCatalog].getName)
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      spark.sessionState.catalogManager.reset()
+      spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat")
+      spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat2")
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  private def v2Catalog(name: String): InMemoryCatalog =
+    spark.sessionState.catalogManager.catalog(name).asInstanceOf[InMemoryCatalog]
+
+  private def createV2Namespace(catalog: String, ns: String): Unit = {
+    v2Catalog(catalog).asInstanceOf[SupportsNamespaces]
+      .createNamespace(Array(ns), emptyProps)
+  }
+
+  private def addV2Function(
+      catalog: String,
+      ns: String,
+      name: String,
+      fn: UnboundFunction): Unit = {
+    v2Catalog(catalog).createFunction(Identifier.of(Array(ns), name), fn)
+  }
+
+  test("V2 catalogs on SET PATH: unqualified table follows first match") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      // pathcat and pathcat2 each have a namespace `ns` and a table `path_v2_t` with
+      // different contents, so we can tell which catalog supplied the row.
+      createV2Namespace("pathcat", "ns")
+      createV2Namespace("pathcat2", "ns")
+      sql("CREATE TABLE pathcat.ns.path_v2_t (id INT) USING foo")
+      sql("INSERT INTO pathcat.ns.path_v2_t VALUES (10)")
+      sql("CREATE TABLE pathcat2.ns.path_v2_t (id INT) USING foo")
+      sql("INSERT INTO pathcat2.ns.path_v2_t VALUES (20)")
+
+      try {
+        sql("SET PATH = pathcat.ns, pathcat2.ns, system.builtin")
+        checkAnswer(sql("SELECT id FROM path_v2_t"), Row(10))
+
+        sql("SET PATH = pathcat2.ns, pathcat.ns, system.builtin")
+        checkAnswer(sql("SELECT id FROM path_v2_t"), Row(20))
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TABLE IF EXISTS pathcat.ns.path_v2_t")
+        sql("DROP TABLE IF EXISTS pathcat2.ns.path_v2_t")
+      }
+    }
+  }
+
+  test("V2 catalogs on SET PATH: unqualified table only in a non-path catalog is not found") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      createV2Namespace("pathcat", "ns_only_here")
+      sql("CREATE TABLE pathcat.ns_only_here.hidden_t (id INT) USING foo")
+      try {
+        // Path does not include pathcat.ns_only_here; bare `hidden_t` must not resolve.
+        sql("SET PATH = pathcat2.ns, system.builtin")
+        val e = intercept[AnalysisException] {
+          sql("SELECT id FROM hidden_t").collect()
+        }
+        assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" ||
+            e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"),
+          s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: ${e.getMessage}")
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql("DROP TABLE IF EXISTS pathcat.ns_only_here.hidden_t")
+      }
+    }
+  }
+
+  test("V2 catalogs on SET PATH: unqualified function follows first match") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      // Two V2 catalogs each register a `strlen` function under the same name but with
+      // distinguishable return values: pathcat returns the true length, pathcat2 returns
+      // the length times 100. The result distinguishes which catalog supplied the
+      // function for the same argument, so swapping the path order must change the row.
+      createV2Namespace("pathcat", "fns")
+      createV2Namespace("pathcat2", "fns")
+      addV2Function("pathcat", "fns", "strlen", StrLen(StrLenDefault))
+      addV2Function("pathcat2", "fns", "strlen", StrLen(StrLenTimes100))
+      try {
+        sql("SET PATH = pathcat.fns, pathcat2.fns, system.builtin")
+        checkAnswer(sql("SELECT strlen('abc')"), Row(3))
+
+        sql("SET PATH = pathcat2.fns, pathcat.fns, system.builtin")
+        checkAnswer(sql("SELECT strlen('abc')"), Row(300))
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        v2Catalog("pathcat").clearFunctions()
+        v2Catalog("pathcat2").clearFunctions()
+      }
+    }
+  }
+}
+
+/**
+ * A small distinguishable companion to `StrLenDefault` (in `DataSourceV2FunctionSuite.scala`):
+ * returns `s.length * 100` so V2-function resolution tests across catalogs can verify which
+ * catalog supplied the function from the result row alone.
+ */
+case object StrLenTimes100 extends ScalarFunction[Int] {
+  override def inputTypes(): Array[DataType] = Array(StringType)
+  override def resultType(): DataType = IntegerType
+  override def name(): String = "strlen_times_100"
+  override def produceResult(input: InternalRow): Int = input.getString(0).length * 100
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/StreamingTransactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/StreamingTransactionSuite.scala
new file mode 100644
index 0000000000000..d356197fa53c1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/StreamingTransactionSuite.scala
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.catalog.{Aborted, CatalogV2Util, Committed, Identifier, InMemoryBaseTable, InMemoryRowLevelOperationTableCatalog, InMemoryTableCatalog, SharedTablesInMemoryRowLevelOperationTableCatalog, TableInfo}
+import org.apache.spark.sql.execution.streaming.runtime.{MemoryStream, StreamingQueryWrapper}
+import org.apache.spark.sql.streaming.StreamingQuery
+import org.apache.spark.sql.types.StructType
+
+class StreamingTransactionSuite extends RowLevelOperationSuiteBase {
+
+  import testImplicits._
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    spark.conf.set(
+      "spark.sql.catalog.cat",
+      classOf[SharedTablesInMemoryRowLevelOperationTableCatalog].getName)
+  }
+
+  override def afterEach(): Unit = {
+    SharedTablesInMemoryRowLevelOperationTableCatalog.reset()
+    super.afterEach()
+  }
+
+  private def createSimpleTable(schemaString: String): Unit = {
+    val columns = CatalogV2Util.structTypeToV2Columns(StructType.fromDDL(schemaString))
+    val tableInfo = new TableInfo.Builder().withColumns(columns).build()
+    catalog.createTable(ident, tableInfo)
+  }
+
+  private def streamCatalog(query: StreamingQuery): InMemoryRowLevelOperationTableCatalog = {
+    val session = query.asInstanceOf[StreamingQueryWrapper].streamingQuery.sparkSessionForStream
+    session.sessionState.catalogManager.catalog("cat")
+      .asInstanceOf[InMemoryRowLevelOperationTableCatalog]
+  }
+
+  test("streaming write commits a transaction") {
+    createSimpleTable("value INT")
+
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+
+      val query = inputData.toDF()
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .toTable(tableNameAsString)
+
+      assert(table.version() === "0")
+
+      inputData.addData(1, 2, 3)
+      query.processAllAvailable()
+      query.stop()
+
+      val txn = streamCatalog(query).lastTransaction
+      assert(txn != null, "expected a transaction to have been committed")
+      assert(txn.currentState === Committed)
+      assert(txn.isClosed)
+
+      // Pure streaming append: the write target is not read, source is not a TxnTable.
+      val targetTxnTable = indexByName(txn.catalog.txnTables.values.toSeq)(tableNameAsString)
+      assert(txn.catalog.txnTables.size === 1)
+      assert(targetTxnTable.scanEvents.isEmpty)
+      assert(table.version() === "1")
+
+      // Transaction must be scoped to the streaming session; main session catalog is untouched.
+      assert(catalog.observedTransactions.isEmpty)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(Row(1), Row(2), Row(3)))
+    }
+  }
+
+  test("each micro-batch is an independent transaction") {
+    createSimpleTable("value INT")
+
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+
+      val query = inputData.toDF()
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .toTable(tableNameAsString)
+
+      assert(table.version() === "0")
+
+      inputData.addData(1, 2, 3)
+      query.processAllAvailable()
+
+      inputData.addData(4, 5, 6)
+      query.processAllAvailable()
+
+      query.stop()
+
+      val sc = streamCatalog(query)
+      assert(sc.observedTransactions.size === 2)
+      assert(sc.observedTransactions.forall(_.currentState === Committed))
+      // Pure streaming append: write target is not read in any micro-batch.
+      assert(sc.observedTransactions.forall { t =>
+        indexByName(t.catalog.txnTables.values.toSeq)(tableNameAsString).scanEvents.isEmpty
+      })
+      // Each committed micro-batch increments the delegate version exactly once.
+      assert(table.version() === "2")
+
+      // Transaction must be scoped to the streaming session; main session catalog is untouched.
+      assert(catalog.observedTransactions.isEmpty)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(Row(1), Row(2), Row(3), Row(4), Row(5), Row(6)))
+    }
+  }
+
+  for (isSelfScan <- Seq(true, false))
+  test("batch read from catalog-backed table inside streaming query is tracked as a " +
+      s"scan event (isSelfScan=$isSelfScan)") {
+    // Target table for the stream.
+    createSimpleTable("value INT")
+
+    // Pick the static (non-streaming) source table. When isSelfScan is true, the stream's
+    // write target is also used as the batch source.
+    val staticSourceName = if (isSelfScan) {
+      tableNameAsString
+    } else {
+      val sourceIdent = Identifier.of(namespace, "source_table")
+      val srcColumns = CatalogV2Util.structTypeToV2Columns(StructType.fromDDL("value INT"))
+      catalog.createTable(sourceIdent, new TableInfo.Builder().withColumns(srcColumns).build())
+      sourceNameAsString
+    }
+    sql(s"INSERT INTO $staticSourceName VALUES (1), (2), (3)")
+    // The INSERT above runs a transaction on the main session catalog; capture the count now
+    // so we can assert the streaming query does not add more.
+    val mainTxnsBefore = catalog.observedTransactions.size
+
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+
+      // spark.read produces a DataSourceV2Relation (batch), not a streaming source.
+      // UnresolveRelationsInTransaction converts it to V2TableReference each micro-batch so
+      // the transaction-aware catalog can record the scan event.
+      val staticData = spark.read.table(staticSourceName)
+
+      val query = inputData.toDF()
+        .join(staticData, "value")
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .toTable(tableNameAsString)
+
+      // There should be no transaction yet in the cloned session.
+      assert(streamCatalog(query).lastTransaction === null)
+
+      inputData.addData(1, 2, 3)
+      query.processAllAvailable()
+      query.stop()
+
+      val txn = streamCatalog(query).lastTransaction
+      assert(txn != null, "expected a transaction to have been committed")
+      assert(txn.currentState === Committed)
+      assert(txn.isClosed)
+
+      if (isSelfScan) {
+        // Target acts as both write target and batch source.
+        assert(txn.catalog.txnTables.size === 1)
+        val targetTxnTable = indexByName(txn.catalog.txnTables.values.toSeq)(tableNameAsString)
+        assert(targetTxnTable.scanEvents.size === 1)
+      } else {
+        // Both the write target and the batch source participate in the transaction.
+        assert(txn.catalog.txnTables.size === 2)
+        val targetTxnTable = indexByName(txn.catalog.txnTables.values.toSeq)(tableNameAsString)
+        assert(targetTxnTable.scanEvents.isEmpty)
+        // The static source was read exactly once and its scan event was captured.
+        val sourceTxnTable = indexByName(txn.catalog.txnTables.values.toSeq)(sourceNameAsString)
+        assert(sourceTxnTable.scanEvents.size === 1)
+      }
+
+      // Streaming must not add transactions to the main session catalog beyond pre-existing
+      // setup transactions.
+      assert(catalog.observedTransactions.size === mainTxnsBefore)
+
+      // In the self-scan case the target was pre-populated with 1,2,3 and the streaming append
+      // adds another 1,2,3 from the join, so the table ends with two copies of each value.
+      val expectedRows = if (isSelfScan) {
+        Seq(Row(1), Row(2), Row(3), Row(1), Row(2), Row(3))
+      } else {
+        Seq(Row(1), Row(2), Row(3))
+      }
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), expectedRows)
+    }
+  }
+
+  test("micro-batch fails when target table schema changes between batches") {
+    createSimpleTable("value INT")
+
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+      val query = inputData.toDF()
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .toTable(tableNameAsString)
+
+      // Batch 1 succeeds against the original schema captured at query start.
+      inputData.addData(1, 2, 3)
+      query.processAllAvailable()
+
+      val firstTxn = streamCatalog(query).lastTransaction
+      assert(firstTxn != null)
+      assert(firstTxn.currentState === Committed)
+
+      // Mutate the target schema between micro-batches via the main session catalog. The
+      // shared in-memory backing store makes the change visible to the streaming session.
+      sql(s"ALTER TABLE $tableNameAsString ADD COLUMNS (extra STRING)")
+
+      // Batch 2: re-resolution of the WriteTargetContext reference loads the altered table
+      // and validateNoChanges rejects the added column.
+      inputData.addData(4, 5, 6)
+      val ex = intercept[Exception] { query.processAllAvailable() }
+      assert(ex.getMessage.contains("INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS") ||
+        Option(ex.getCause).exists(
+          _.getMessage.contains("INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS")))
+      query.stop()
+
+      // Only batch 1's rows should be visible; batch 2 never wrote anything.
+      checkAnswer(
+        sql(s"SELECT value FROM $tableNameAsString"),
+        Seq(Row(1), Row(2), Row(3)))
+    }
+  }
+
+  test("micro-batch fails when batch source schema changes after capture") {
+    createSimpleTable("value INT")
+
+    val sourceIdent = Identifier.of(namespace, "source_table")
+    val srcColumns = CatalogV2Util.structTypeToV2Columns(StructType.fromDDL("value INT"))
+    catalog.createTable(sourceIdent, new TableInfo.Builder().withColumns(srcColumns).build())
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1), (2), (3)")
+
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+
+      // Capture the static source against its original schema.
+      val staticData = spark.read.table(sourceNameAsString)
+
+      // Mutate the source schema after the static reference was captured.
+      sql(s"ALTER TABLE $sourceNameAsString ADD COLUMNS (extra STRING)")
+
+      val query = inputData.toDF()
+        .join(staticData, "value")
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .toTable(tableNameAsString)
+
+      inputData.addData(1, 2, 3)
+      val ex = intercept[Exception] { query.processAllAvailable() }
+      assert(ex.getMessage.contains("INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS") ||
+        Option(ex.getCause).exists(
+          _.getMessage.contains("INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS")))
+      query.stop()
+
+      // Analysis failed before any commit. The target must remain empty.
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Seq.empty)
+    }
+  }
+
+  test("transaction is aborted when micro-batch write fails and no data is written") {
+    val columns = CatalogV2Util.structTypeToV2Columns(StructType.fromDDL("value INT"))
+    val tableInfo = new TableInfo.Builder()
+      .withColumns(columns)
+      .withProperties(Collections.singletonMap(
+        InMemoryBaseTable.SIMULATE_FAILED_WRITE_OPTION, "true"))
+      .build()
+    catalog.createTable(ident, tableInfo)
+
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+      val query = inputData.toDF()
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .toTable(tableNameAsString)
+
+      inputData.addData(1, 2, 3)
+      intercept[Exception] { query.processAllAvailable() }
+      query.stop()
+
+      val txn = streamCatalog(query).lastTransaction
+      assert(txn != null, "expected a transaction to have been recorded")
+      assert(txn.currentState === Aborted)
+      assert(txn.isClosed)
+      // Aborted transaction must not advance the delegate version.
+      assert(table.version() === "0")
+
+      // Transaction must be scoped to the streaming session; main session catalog is untouched.
+      assert(catalog.observedTransactions.isEmpty)
+
+      // Writes must not be visible after an aborted transaction.
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Seq.empty)
+    }
+  }
+
+  test("streaming write to non-transactional catalog does not start a transaction") {
+    withSQLConf("spark.sql.catalog.nonTxnCat" -> classOf[InMemoryTableCatalog].getName) {
+      val nonTxnCat = spark
+        .sessionState
+        .catalogManager
+        .catalog("nonTxnCat")
+        .asInstanceOf[InMemoryTableCatalog]
+      val columns = CatalogV2Util.structTypeToV2Columns(StructType.fromDDL("value INT"))
+      nonTxnCat.createTable(
+        Identifier.of(Array("ns"), "tbl"),
+        new TableInfo.Builder().withColumns(columns).build())
+
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[Int]
+        val query = inputData.toDF()
+          .writeStream
+          .option("checkpointLocation", checkpointDir.getAbsolutePath)
+          .toTable("nonTxnCat.ns.tbl")
+
+        inputData.addData(1, 2, 3)
+        query.processAllAvailable()
+        query.stop()
+
+        assert(catalog.observedTransactions.isEmpty,
+          "no transaction expected for non-transactional catalog")
+        checkAnswer(spark.table("nonTxnCat.ns.tbl"), Seq(Row(1), Row(2), Row(3)))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
index d3a6b61a61b9f..6e9afe7abc97e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.sql.connector
 
 import org.apache.spark.SparkRuntimeException
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue, InMemoryTable, TableChange, TableInfo}
+import org.apache.spark.internal.config
+import org.apache.spark.sql.{sources, AnalysisException, Row}
+import org.apache.spark.sql.connector.catalog.{Aborted, Column, ColumnDefaultValue, Committed, InMemoryTable, TableChange, TableInfo}
 import org.apache.spark.sql.connector.expressions.{GeneralScalarExpression, LiteralValue}
 import org.apache.spark.sql.connector.write.UpdateSummary
 import org.apache.spark.sql.internal.SQLConf
@@ -222,6 +223,38 @@ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
     checkUpdateMetrics(numUpdatedRows = 0, numCopiedRows = 0)
   }
 
+  test("update with literal false condition") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "hardware" }
+        |{ "pk": 3, "salary": null, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"UPDATE $tableNameAsString SET salary = -1 WHERE false")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(1, 100, "hr") :: Row(2, 200, "hardware") :: Row(3, null, "hr") :: Nil)
+
+    checkUpdateMetrics(numUpdatedRows = 0, numCopiedRows = 0)
+  }
+
+  test("update with literal true condition") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "hardware" }
+        |{ "pk": 3, "salary": null, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"UPDATE $tableNameAsString SET salary = -1 WHERE true")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(1, -1, "hr") :: Row(2, -1, "hardware") :: Row(3, -1, "hr") :: Nil)
+
+    checkUpdateMetrics(numUpdatedRows = 3, numCopiedRows = 0)
+  }
+
   test("update without condition") {
     createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
       """{ "pk": 1, "salary": 100, "dep": "hr" }
@@ -308,6 +341,47 @@ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
     checkUpdateMetrics(numUpdatedRows = 2, numCopiedRows = 1)
   }
 
+  test("metric values are stable across stage retries") {
+    // Force a shuffle in the UPDATE plan via an IN-subquery (with broadcast disabled), then
+    // have the DAGScheduler corrupt the first attempt of every upstream shuffle map stage.
+    // Note: the current fetch-failure injection does not retry the writer stage, so this
+    // test passes equally well with plain SQLMetric — it only exercises the SLAM-aware
+    // read path. Follow-up #55738 will add infra to actually retry the writer stage and
+    // exercise the SLAM behavior end-to-end for UPDATE.
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("source") {
+        createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+          """{ "pk": 1, "salary": 100, "dep": "hr" }
+            |{ "pk": 2, "salary": 200, "dep": "software" }
+            |{ "pk": 3, "salary": 300, "dep": "hr" }
+            |{ "pk": 4, "salary": 400, "dep": "software" }
+            |""".stripMargin)
+
+        val sourceDF = Seq(1, 2).toDF("pk")
+        sourceDF.createOrReplaceTempView("source")
+
+        withSparkContextConf(
+            config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key -> "true") {
+          sql(
+            s"""UPDATE $tableNameAsString
+               |SET salary = salary + 100
+               |WHERE pk IN (SELECT pk FROM source)
+               |""".stripMargin)
+        }
+
+        checkUpdateMetrics(numUpdatedRows = 2, numCopiedRows = 2)
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Seq(
+            Row(1, 200, "hr"),
+            Row(2, 300, "software"),
+            Row(3, 300, "hr"),
+            Row(4, 400, "software")))
+      }
+    }
+  }
+
   test("update nested struct fields") {
     createAndInitTable(
       s"""pk INT NOT NULL,
@@ -835,4 +909,325 @@ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
           Row(5)))
     }
   }
+
+  test("update with analysis failure and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    val exception = intercept[AnalysisException] {
+      sql(s"UPDATE $tableNameAsString SET invalid_column = -1")
+    }
+
+    assert(exception.getMessage.contains("invalid_column"))
+    assert(catalog.lastTransaction.currentState == Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  test("update with CTE and transactional checks") {
+    // create table
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // create source table
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'hr'), (4, 400, 'finance')")
+
+    // update using CTE
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""WITH cte AS (
+           |  SELECT pk, salary + 50 AS adjusted_salary, dep
+           |  FROM $sourceNameAsString
+           |  WHERE salary > 100
+           |)
+           |UPDATE $tableNameAsString t
+           |SET salary = -1
+           |WHERE t.dep = 'hr' AND EXISTS (SELECT 1 FROM cte WHERE cte.pk = t.pk)
+           |""".stripMargin)
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 2)
+    assert(table.version() == "2")
+
+    // check target table was scanned correctly
+    val targetTxnTable = txnTables(tableNameAsString)
+    val expectedNumTargetScans = if (deltaUpdate) 2 else 3
+    assert(targetTxnTable.scanEvents.size == expectedNumTargetScans)
+
+    // check target table scans for UPDATE condition (dep = 'hr')
+    val numUpdateTargetScans = targetTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    }
+    assert(numUpdateTargetScans == expectedNumTargetScans)
+
+    // check source table was scanned correctly
+    val sourceTxnTable = txnTables(sourceNameAsString)
+    val expectedNumSourceScans = if (deltaUpdate) 2 else 4
+    assert(sourceTxnTable.scanEvents.size == expectedNumSourceScans)
+
+    // check source table scans in CTE (salary > 100)
+    val numCteSourceScans = sourceTxnTable.scanEvents.flatten.count {
+      case sources.GreaterThan("salary", 100) => true
+      case _ => false
+    }
+    assert(numCteSourceScans == expectedNumSourceScans)
+
+    // check txn state was propagated correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, -1, "hr"), // updated
+        Row(2, 200, "software"), // unchanged
+        Row(3, 300, "hr"))) // unchanged (no matching pk in source)
+  }
+
+  test("update with subquery on source table and transactional checks") {
+    // create target table
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // create source table
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'hr'), (4, 400, 'finance')")
+
+    // update using an uncorrelated IN subquery that reads from a transactional catalog table
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""UPDATE $tableNameAsString
+           |SET salary = -1
+           |WHERE pk IN (SELECT pk FROM $sourceNameAsString WHERE dep = 'hr')
+           |""".stripMargin)
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 2)
+    assert(table.version() == "2")
+
+    // check source table was scanned correctly (dep = 'hr' filter in the subquery)
+    val sourceTxnTable = txnTables(sourceNameAsString)
+    val expectedNumSourceScans = if (deltaUpdate) 2 else 4
+    assert(sourceTxnTable.scanEvents.size == expectedNumSourceScans)
+
+    val numSubquerySourceScans = sourceTxnTable.scanEvents.flatten.count {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    }
+    assert(numSubquerySourceScans == expectedNumSourceScans)
+
+    // check target table was scanned correctly
+    val targetTxnTable = txnTables(tableNameAsString)
+    val expectedNumTargetScans = if (deltaUpdate) 2 else 3
+    assert(targetTxnTable.scanEvents.size == expectedNumTargetScans)
+
+    // check txn state was propagated correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, -1, "hr"), // updated (pk 1 is in subquery result)
+        Row(2, 200, "software"), // unchanged
+        Row(3, 300, "hr"))) // unchanged (pk 3 not in subquery result)
+  }
+
+  test("update with uncorrelated scalar subquery on source table and transactional checks") {
+    // create target table
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    // create source table
+    sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT, dep STRING)")
+    sql(s"INSERT INTO $sourceNameAsString VALUES (1, 150, 'hr'), (4, 400, 'finance')")
+
+    // update using an uncorrelated scalar subquery in the SET clause that reads from a
+    // transactional catalog table; scalar subqueries are executed as SubqueryExec at runtime
+    // and cannot be rewritten as joins
+    val (txn, txnTables) = executeTransaction {
+      sql(
+        s"""UPDATE $tableNameAsString
+           |SET salary = (SELECT max(salary) FROM $sourceNameAsString WHERE dep = 'hr')
+           |WHERE dep = 'hr'
+           |""".stripMargin)
+    }
+
+    // check txn was properly committed and closed
+    assert(txn.currentState == Committed)
+    assert(txn.isClosed)
+    assert(txnTables.size == 2)
+    assert(table.version() == "2")
+
+    // check source table was scanned via the transaction catalog
+    val sourceTxnTable = txnTables(sourceNameAsString)
+    assert(sourceTxnTable.scanEvents.nonEmpty)
+    assert(sourceTxnTable.scanEvents.flatten.exists {
+      case sources.EqualTo("dep", "hr") => true
+      case _ => false
+    })
+
+    // check target table was scanned via the transaction catalog
+    val targetTxnTable = txnTables(tableNameAsString)
+    assert(targetTxnTable.scanEvents.nonEmpty)
+
+    // check txn state was propagated correctly
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 150, "hr"), // updated (max salary in source for 'hr' is 150)
+        Row(2, 200, "software"), // unchanged
+        Row(3, 150, "hr"))) // updated
+  }
+
+  test("update with constraint violation and transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |{ "pk": 3, "salary": 300, "dep": "hr" }
+        |""".stripMargin)
+
+    val exception = intercept[SparkRuntimeException] {
+      executeTransaction {
+        sql(
+          s"""UPDATE $tableNameAsString
+             |SET pk = NULL
+             |WHERE dep = 'hr'
+             |""".stripMargin) // NULL violates NOT NULL constraint
+      }
+    }
+
+    assert(exception.getMessage.contains("NOT_NULL_ASSERT_VIOLATION"))
+    assert(catalog.lastTransaction.currentState == Aborted)
+    assert(catalog.lastTransaction.isClosed)
+  }
+
+  test("update using view with transactional checks") {
+    withView("temp_view") {
+      // create target table
+      createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+        """{ "pk": 1, "salary": 100, "dep": "hr" }
+          |{ "pk": 2, "salary": 200, "dep": "software" }
+          |{ "pk": 3, "salary": 300, "dep": "hr" }
+          |""".stripMargin)
+
+      // create source table
+      sql(s"CREATE TABLE $sourceNameAsString (pk INT NOT NULL, salary INT)")
+      sql(s"INSERT INTO $sourceNameAsString (pk, salary) VALUES (1, 150), (4, 400)")
+
+      // create view on top of source and target tables
+      sql(
+        s"""CREATE VIEW temp_view AS
+           |SELECT s.pk, s.salary, t.dep
+           |FROM $sourceNameAsString s
+           |LEFT JOIN (
+           | SELECT * FROM $tableNameAsString WHERE pk < 10
+           |) t ON s.pk = t.pk
+           |""".stripMargin)
+
+      // update target table using view
+      val (txn, txnTables) = executeTransaction {
+        sql(
+          s"""UPDATE $tableNameAsString t
+             |SET salary = -1
+             |WHERE t.dep = 'hr' AND EXISTS (SELECT 1 FROM temp_view v WHERE v.pk = t.pk)
+             |""".stripMargin)
+      }
+
+      // check txn covers both tables and was properly committed and closed
+      assert(txn.currentState == Committed)
+      assert(txn.isClosed)
+      assert(txnTables.size == 2)
+      assert(table.version() == "2")
+
+      // check target table was scanned correctly
+      val targetTxnTable = txnTables(tableNameAsString)
+      val expectedNumTargetScans = if (deltaUpdate) 4 else 7
+      assert(targetTxnTable.scanEvents.size == expectedNumTargetScans)
+
+      // check target table scans as UPDATE target (dep = 'hr')
+      val numUpdateTargetScans = targetTxnTable.scanEvents.flatten.count {
+        case sources.EqualTo("dep", "hr") => true
+        case _ => false
+      }
+      val expectedNumUpdateTargetScans = if (deltaUpdate) 2 else 3
+      assert(numUpdateTargetScans == expectedNumUpdateTargetScans)
+
+      // check target table scans in view as source (pk < 10)
+      val numViewTargetScans = targetTxnTable.scanEvents.flatten.count {
+        case sources.LessThan("pk", 10L) => true
+        case _ => false
+      }
+      val expectedNumViewTargetScans = if (deltaUpdate) 2 else 4
+      assert(numViewTargetScans == expectedNumViewTargetScans)
+
+      // check source table scans in view
+      val sourceTxnTable = txnTables(sourceNameAsString)
+      val expectedNumSourceScans = if (deltaUpdate) 2 else 4
+      assert(sourceTxnTable.scanEvents.size == expectedNumSourceScans)
+
+      // check txn state was propagated correctly
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Seq(
+          Row(1, -1, "hr"), // updated from view
+          Row(2, 200, "software"), // unchanged
+          Row(3, 300, "hr"))) // unchanged (no matching pk in source)
+    }
+  }
+
+  test("df.explain() on update with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    // sql() is lazy, but explain() forces executedPlan.
+    sql(s"UPDATE $tableNameAsString SET salary = -1 WHERE dep = 'hr'").explain()
+
+    assert(catalog.lastTransaction != null)
+    assert(catalog.lastTransaction.currentState == Committed)
+    assert(catalog.lastTransaction.isClosed)
+    assert(table.version() == "2")
+
+    // The UPDATE was actually executed, not just planned.
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, -1, "hr"), // updated
+        Row(2, 200, "software"))) // unchanged
+  }
+
+  test("EXPLAIN UPDATE SQL with transactional checks") {
+    createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 100, "dep": "hr" }
+        |{ "pk": 2, "salary": 200, "dep": "software" }
+        |""".stripMargin)
+
+    // EXPLAIN UPDATE only plans the command, it does not execute the write.
+    sql(s"EXPLAIN UPDATE $tableNameAsString SET salary = -1 WHERE dep = 'hr'")
+
+    // A transaction should not have started at all.
+    assert(catalog.transaction === null)
+
+    // The UPDATE was not executed. Data is unchanged.
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Seq(
+        Row(1, 100, "hr"),
+        Row(2, 200, "software")))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 721b86593bacb..3e48c5222e6f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -253,6 +253,9 @@ class V1WriteFallbackSessionCatalogSuite
   extends InsertIntoTests(supportsDynamicOverwrite = false, includeSQLOnlyTests = true)
   with SessionCatalogTest[InMemoryTableWithV1Fallback, V1FallbackTableCatalog] {
 
+  // V1 fallback writes do not flow through V2TableWriteExec, so no InsertSummary is emitted.
+  override protected def checkInsertMetrics(tableName: String, numInsertedRows: Long): Unit = ()
+
   override protected val v2Format = classOf[InMemoryV1Provider].getName
   override protected val catalogClassName: String = classOf[V1FallbackTableCatalog].getName
   override protected val catalogAndNamespace: String = ""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 2c10497c190e8..c626d7183513e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -876,39 +876,6 @@ class QueryCompilationErrorsSuite
         "inputTypes" -> "[\"INT\", \"STRING\", \"STRING\"]"))
   }
 
-  test("SPARK-49666: the trim collation feature is off without collate builder call") {
-    withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
-      Seq(
-        "CREATE TABLE t(col STRING COLLATE EN_RTRIM_CI) USING parquet",
-        "CREATE TABLE t(col STRING COLLATE UTF8_LCASE_RTRIM) USING parquet",
-        "SELECT 'aaa' COLLATE UNICODE_LTRIM_CI"
-      ).foreach { sqlText =>
-        checkError(
-          exception = intercept[AnalysisException](sql(sqlText)),
-          condition = "UNSUPPORTED_FEATURE.TRIM_COLLATION"
-        )
-      }
-    }
-  }
-
-  test("SPARK-49666: the trim collation feature is off with collate builder call") {
-    withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
-      Seq(
-        "SELECT collate('aaa', 'UNICODE_RTRIM')",
-        "SELECT collate('aaa', 'UTF8_BINARY_RTRIM')",
-        "SELECT collate('aaa', 'EN_AI_RTRIM')"
-      ).foreach { sqlText =>
-        checkError(
-          exception = intercept[AnalysisException](sql(sqlText)),
-          condition = "UNSUPPORTED_FEATURE.TRIM_COLLATION",
-          parameters = Map.empty,
-          context =
-            ExpectedContext(fragment = sqlText.substring(7), start = 7, stop = sqlText.length - 1)
-        )
-      }
-    }
-  }
-
   test("SPARK-50779: the object level collations feature is unsupported when flag is disabled") {
     withSQLConf(SQLConf.OBJECT_LEVEL_COLLATIONS_ENABLED.key -> "false") {
       Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/MetricViewV2CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/MetricViewV2CatalogSuite.scala
new file mode 100644
index 0000000000000..fedc2475f90ed
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/MetricViewV2CatalogSuite.scala
@@ -0,0 +1,1078 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchViewException, ViewAlreadyExistsException}
+import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog, MetadataTable, Table, TableCatalog, TableDependency, TableSummary, TableViewCatalog, ViewInfo}
+import org.apache.spark.sql.metricview.serde.{AssetSource, Column, Constants, DimensionExpression, MeasureExpression, MetricView, MetricViewFactory, SQLSource}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.Metadata
+
+/**
+ * Tests that exercise [[org.apache.spark.sql.metricview.logical.CreateMetricView]] on a
+ * non-session V2 catalog (routed through
+ * [[org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy]] to
+ * [[org.apache.spark.sql.execution.datasources.v2.CreateV2MetricViewExec]]).
+ * Metric views are persisted through the same [[ViewCatalog]] interface
+ * as plain views; the only marker that distinguishes them is `PROP_TABLE_TYPE = METRIC_VIEW`
+ * plus the typed `viewDependencies` field on [[ViewInfo]]. The recording catalog used here is a
+ * minimal [[TableViewCatalog]] so the same instance can also host the source table referenced by
+ * the metric view's YAML.
+ */
+class MetricViewV2CatalogSuite extends QueryTest with SharedSparkSession {
+
+  import testImplicits._
+
+  private val testCatalogName = "testcat"
+  private val testNamespace = "ns"
+  private val sourceTableName = "events"
+  private val fullSourceTableName =
+    s"$testCatalogName.$testNamespace.$sourceTableName"
+  private val metricViewName = "mv"
+  private val fullMetricViewName =
+    s"$testCatalogName.$testNamespace.$metricViewName"
+
+  private val metricViewColumns = Seq(
+    Column("region", DimensionExpression("region"), 0),
+    Column("count_sum", MeasureExpression("sum(count)"), 1))
+
+  private val testTableData = Seq(
+    ("region_1", 1, 5.0),
+    ("region_2", 2, 10.0))
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(
+      s"spark.sql.catalog.$testCatalogName",
+      classOf[MetricViewRecordingCatalog].getName)
+    // A catalog that does not implement ViewCatalog - used for the negative gate test.
+    spark.conf.set(
+      s"spark.sql.catalog.${MetricViewV2CatalogSuite.noViewCatalogName}",
+      classOf[InMemoryTableCatalog].getName)
+  }
+
+  override protected def afterAll(): Unit = {
+    spark.conf.unset(s"spark.sql.catalog.$testCatalogName")
+    spark.conf.unset(
+      s"spark.sql.catalog.${MetricViewV2CatalogSuite.noViewCatalogName}")
+    super.afterAll()
+  }
+
+  private def withTestCatalogTables(body: => Unit): Unit = {
+    MetricViewRecordingCatalog.reset()
+    testTableData.toDF("region", "count", "price")
+      .createOrReplaceTempView("metric_view_v2_source")
+    try {
+      sql(
+        s"""CREATE TABLE $fullSourceTableName
+           |USING foo AS SELECT * FROM metric_view_v2_source""".stripMargin)
+      body
+    } finally {
+      // The metric-view ident `mv` may have ended up as either a view (most tests) or as a
+      // pre-created table (a few negative tests pre-create a table at the same ident to
+      // exercise cross-type collisions). Sweep both kinds so subsequent tests in the suite
+      // start from a clean catalog state. Wrap each DROP in a Try because:
+      //   - DROP VIEW IF EXISTS on a leftover *table* throws WRONG_COMMAND_FOR_OBJECT_TYPE
+      //     under master's new DropViewExec active-rejection contract.
+      //   - DROP TABLE IF EXISTS on a leftover *view* throws the symmetric error.
+      //   - On a totally clean state both are silent no-ops.
+      scala.util.Try(sql(s"DROP VIEW IF EXISTS $fullMetricViewName"))
+      scala.util.Try(sql(s"DROP TABLE IF EXISTS $fullMetricViewName"))
+      scala.util.Try(sql(s"DROP TABLE IF EXISTS $fullSourceTableName"))
+      spark.catalog.dropTempView("metric_view_v2_source")
+      MetricViewRecordingCatalog.reset()
+    }
+  }
+
+  private def createMetricView(
+      name: String,
+      metricView: MetricView,
+      comment: Option[String] = None): String = {
+    val yaml = MetricViewFactory.toYAML(metricView)
+    val commentClause = comment.map(c => s"\nCOMMENT '$c'").getOrElse("")
+    sql(
+      s"""CREATE VIEW $name
+         |WITH METRICS$commentClause
+         |LANGUAGE YAML
+         |AS
+         |$$$$
+         |$yaml
+         |$$$$""".stripMargin)
+    yaml
+  }
+
+  private def capturedViewInfo(): ViewInfo = {
+    val ident = Identifier.of(Array(testNamespace), metricViewName)
+    val info = MetricViewRecordingCatalog.capturedViews.get(ident)
+    assert(info != null,
+      s"Expected ViewInfo for $ident to be captured by the V2 catalog")
+    info
+  }
+
+  // ============================================================
+  // Section 1: CREATE-related tests
+  // ============================================================
+
+
+  test("V2 catalog receives METRIC_VIEW table type and view text via ViewInfo") {
+    withTestCatalogTables {
+      val metricView = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      val yaml = createMetricView(fullMetricViewName, metricView)
+
+      val info = capturedViewInfo()
+      // PROP_TABLE_TYPE is overwritten to METRIC_VIEW after `ViewInfo`'s constructor stamps it
+      // to VIEW; this is the marker `V1Table.toCatalogTable` reads to map the round-tripped row
+      // back to `CatalogTableType.METRIC_VIEW`.
+      assert(info.properties().get(TableCatalog.PROP_TABLE_TYPE)
+        === TableSummary.METRIC_VIEW_TABLE_TYPE)
+      // The captured queryText is the raw text between `$$ ... $$` -- including the leading
+      // and trailing newline our SQL fixture inserts -- so trim before comparing to the
+      // pre-substitution YAML body.
+      assert(info.queryText().trim === yaml.trim)
+
+      val deps = info.viewDependencies()
+      assert(deps != null)
+      assert(deps.dependencies().length === 1)
+      val tableDep = deps.dependencies()(0).asInstanceOf[TableDependency]
+      assert(tableDep.nameParts().toSeq ===
+        Seq(testCatalogName, testNamespace, sourceTableName))
+    }
+  }
+
+  test("V2 catalog path populates metric_view.* + view context + sql configs on ViewInfo") {
+    withTestCatalogTables {
+      val metricView = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = Some("count > 0"),
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, metricView)
+
+      val info = capturedViewInfo()
+      val props = info.properties()
+
+      // metric_view.* descriptive properties (mirrors the canonical metric-view property
+      // layout).
+      assert(props.get(MetricView.PROP_FROM_TYPE) === "ASSET")
+      assert(props.get(MetricView.PROP_FROM_NAME) === fullSourceTableName)
+      assert(props.get(MetricView.PROP_FROM_SQL) === null)
+      assert(props.get(MetricView.PROP_WHERE) === "count > 0")
+
+      // SQL configs and current catalog/namespace are first-class typed fields on ViewInfo, no
+      // longer encoded into properties for V2 catalogs.
+      assert(info.sqlConfigs().size > 0,
+        s"Expected at least one captured SQL config; got ${info.sqlConfigs()}")
+      assert(info.currentCatalog() ===
+        spark.sessionState.catalogManager.currentCatalog.name())
+      assert(info.currentNamespace().toSeq ===
+        spark.sessionState.catalogManager.currentNamespace.toSeq)
+    }
+  }
+
+  test("V2 catalog path captures SQL source and comment") {
+    withTestCatalogTables {
+      val metricView = MetricView(
+        "0.1",
+        SQLSource(s"SELECT * FROM $fullSourceTableName"),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, metricView, comment = Some("my mv"))
+
+      val info = capturedViewInfo()
+      val props = info.properties()
+      assert(props.get(TableCatalog.PROP_TABLE_TYPE)
+        === TableSummary.METRIC_VIEW_TABLE_TYPE)
+      assert(props.get(MetricView.PROP_FROM_TYPE) === "SQL")
+      assert(props.get(MetricView.PROP_FROM_NAME) === null)
+      assert(props.get(MetricView.PROP_FROM_SQL) ===
+        s"SELECT * FROM $fullSourceTableName")
+      assert(props.get(TableCatalog.PROP_COMMENT) === "my mv")
+
+      val deps = info.viewDependencies()
+      assert(deps != null && deps.dependencies().length === 1)
+      val tableDep = deps.dependencies()(0).asInstanceOf[TableDependency]
+      assert(tableDep.nameParts().toSeq ===
+        Seq(testCatalogName, testNamespace, sourceTableName))
+    }
+  }
+
+  test("metric view columns carry metric_view.type / metric_view.expr in column metadata") {
+    withTestCatalogTables {
+      val metricView = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, metricView)
+
+      val cols = capturedViewInfo().columns()
+      assert(cols.length === metricViewColumns.length)
+
+      val byName = cols.map(c => c.name() -> c).toMap
+      def metadataOf(name: String): Metadata =
+        Metadata.fromJson(Option(byName(name).metadataInJSON()).getOrElse("{}"))
+
+      val regionMeta = metadataOf("region")
+      assert(regionMeta.getString(Constants.COLUMN_TYPE_PROPERTY_KEY) === "dimension")
+      assert(regionMeta.getString(Constants.COLUMN_EXPR_PROPERTY_KEY) === "region")
+
+      val countMeta = metadataOf("count_sum")
+      assert(countMeta.getString(Constants.COLUMN_TYPE_PROPERTY_KEY) === "measure")
+      assert(countMeta.getString(Constants.COLUMN_EXPR_PROPERTY_KEY) === "sum(count)")
+    }
+  }
+
+  test("user-specified column names with comments preserve metric_view.* metadata") {
+    withTestCatalogTables {
+      val metricView = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      val yaml = MetricViewFactory.toYAML(metricView)
+      // Pins aliasPlan(retainMetadata = true): metric_view.* keys must survive a column
+      // rename with comments.
+      sql(
+        s"""CREATE VIEW $fullMetricViewName (reg COMMENT 'region alias', n COMMENT 'count')
+           |WITH METRICS
+           |LANGUAGE YAML
+           |AS
+           |$$$$
+           |$yaml
+           |$$$$""".stripMargin)
+
+      val cols = capturedViewInfo().columns()
+      val byName = cols.map(c => c.name() -> c).toMap
+      assert(byName.keySet === Set("reg", "n"))
+
+      def metadataOf(name: String): Metadata =
+        Metadata.fromJson(Option(byName(name).metadataInJSON()).getOrElse("{}"))
+
+      val regMeta = metadataOf("reg")
+      assert(regMeta.getString(Constants.COLUMN_TYPE_PROPERTY_KEY) === "dimension")
+      assert(regMeta.getString(Constants.COLUMN_EXPR_PROPERTY_KEY) === "region")
+      // `CatalogV2Util.structTypeToV2Columns` peels "comment" off into `Column.comment()`
+      // rather than leaving it inside `metadataInJSON`; assert via the V2 column accessor.
+      assert(byName("reg").comment() === "region alias")
+
+      val nMeta = metadataOf("n")
+      assert(nMeta.getString(Constants.COLUMN_TYPE_PROPERTY_KEY) === "measure")
+      assert(nMeta.getString(Constants.COLUMN_EXPR_PROPERTY_KEY) === "sum(count)")
+      assert(byName("n").comment() === "count")
+    }
+  }
+
+  test("CREATE OR REPLACE VIEW ... WITH METRICS replaces an existing v2 metric view") {
+    withTestCatalogTables {
+      val first = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = Some("count > 0"),
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, first)
+
+      // Replace with a new body (different WHERE clause).
+      val replacement = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = Some("count > 100"),
+        select = metricViewColumns)
+      val replacementYaml = MetricViewFactory.toYAML(replacement)
+      sql(
+        s"""CREATE OR REPLACE VIEW $fullMetricViewName
+           |WITH METRICS
+           |LANGUAGE YAML
+           |AS
+           |$$$$
+           |$replacementYaml
+           |$$$$""".stripMargin)
+
+      val finalInfo = capturedViewInfo()
+      // Assert on the distinguishing fields of the replacement, not on diff vs. the original.
+      // queryText keeps the surrounding `\n` from the SQL `$$ ... $$` markers; trim first.
+      assert(finalInfo.queryText().trim === replacementYaml.trim)
+      assert(finalInfo.properties().get(MetricView.PROP_WHERE) === "count > 100")
+      val deps = finalInfo.viewDependencies()
+      assert(deps != null && deps.dependencies().length === 1)
+      val tableDep = deps.dependencies()(0).asInstanceOf[TableDependency]
+      assert(tableDep.nameParts().toSeq ===
+        Seq(testCatalogName, testNamespace, sourceTableName))
+    }
+  }
+
+  test("CREATE VIEW IF NOT EXISTS ... WITH METRICS is a no-op when the view exists") {
+    withTestCatalogTables {
+      val original = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, original)
+      val originalYaml = capturedViewInfo().queryText()
+
+      // Now CREATE VIEW IF NOT EXISTS with a different YAML body. The catalog should not see
+      // the second create at all (V2ViewPreparation's `viewExists` short-circuit fires before
+      // `buildViewInfo`), so the captured ViewInfo retains the original body.
+      val replacement = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = Some("count > 999"),
+        select = metricViewColumns)
+      val replacementYaml = MetricViewFactory.toYAML(replacement)
+      sql(
+        s"""CREATE VIEW IF NOT EXISTS $fullMetricViewName
+           |WITH METRICS
+           |LANGUAGE YAML
+           |AS
+           |$$$$
+           |$replacementYaml
+           |$$$$""".stripMargin)
+
+      assert(capturedViewInfo().queryText().trim === originalYaml.trim,
+        "IF NOT EXISTS over an existing metric view should be a no-op.")
+    }
+  }
+
+  test("CREATE VIEW ... WITH METRICS over a v2 table at the ident throws " +
+      "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") {
+    withTestCatalogTables {
+      // Pre-create a regular v2 table at the same ident the metric view will target. The
+      // catalog's `createView` call below should raise `ViewAlreadyExistsException`, which
+      // `CreateV2MetricViewExec` then decodes (via `tableExists`) into the precise cross-type
+      // collision error that `CreateV2ViewExec` emits.
+      sql(s"CREATE TABLE $fullMetricViewName (x INT) USING foo")
+
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      val yaml = MetricViewFactory.toYAML(mv)
+      val ex = intercept[AnalysisException] {
+        sql(
+          s"""CREATE VIEW $fullMetricViewName
+             |WITH METRICS
+             |LANGUAGE YAML
+             |AS
+             |$$$$
+             |$yaml
+             |$$$$""".stripMargin)
+      }
+      // SPARK-56655 added an analyzer-time pre-check for "ident already occupied by a table"
+      // before the v2 view-create exec runs, so the more specific
+      // `EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE` decoded by `CreateV2MetricViewExec.run`'s catch
+      // block is no longer reachable when a *plain* table sits at the ident -- the analyzer
+      // raises `TABLE_OR_VIEW_ALREADY_EXISTS` first. Both errors carry the same actionable
+      // signal ("can't create a view here because something else already lives at this ident").
+      assert(ex.getCondition === "TABLE_OR_VIEW_ALREADY_EXISTS",
+        s"Expected TABLE_OR_VIEW_ALREADY_EXISTS, got ${ex.getCondition}: ${ex.getMessage}")
+    }
+  }
+
+  test("CREATE VIEW IF NOT EXISTS ... WITH METRICS is a no-op when a v2 table sits at the " +
+      "ident") {
+    withTestCatalogTables {
+      sql(s"CREATE TABLE $fullMetricViewName (x INT) USING foo")
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      val yaml = MetricViewFactory.toYAML(mv)
+      // IF NOT EXISTS over a table is a no-op (v1 parity), not an error.
+      sql(
+        s"""CREATE VIEW IF NOT EXISTS $fullMetricViewName
+           |WITH METRICS
+           |LANGUAGE YAML
+           |AS
+           |$$$$
+           |$yaml
+           |$$$$""".stripMargin)
+      val ident = Identifier.of(Array(testNamespace), metricViewName)
+      assert(!MetricViewRecordingCatalog.capturedViews.containsKey(ident),
+        "IF NOT EXISTS over a v2 table should not register a view in the catalog.")
+    }
+  }
+
+  test("CREATE VIEW ... WITH METRICS on a non-ViewCatalog catalog fails with " +
+      "MISSING_CATALOG_ABILITY.VIEWS") {
+    val ex = intercept[AnalysisException] {
+      sql(
+        s"""CREATE VIEW ${MetricViewV2CatalogSuite.noViewCatalogName}.default.mv
+           |WITH METRICS
+           |LANGUAGE YAML
+           |AS
+           |$$$$
+           |${MetricViewFactory.toYAML(MetricView(
+              "0.1",
+              AssetSource(fullSourceTableName),
+              where = None,
+              select = metricViewColumns))}
+           |$$$$""".stripMargin)
+    }
+    // SPARK-56655 added the `.VIEWS` subclass; the bare `MISSING_CATALOG_ABILITY` no longer
+    // surfaces directly for the missing-view-ability case.
+    assert(ex.getCondition === "MISSING_CATALOG_ABILITY.VIEWS")
+    assert(ex.getMessage.contains("VIEWS"))
+  }
+
+  test("CREATE VIEW ... WITH METRICS at a multi-level-namespace v2 target succeeds") {
+    val deepNamespace = Array("ns_a", "ns_b")
+    val deepMetricViewName = "mv_deep"
+    val fullDeepName =
+      s"$testCatalogName.${deepNamespace.mkString(".")}.$deepMetricViewName"
+    withTestCatalogTables {
+      // Pre-create the multi-level namespace + a source table inside it. The metric view
+      // *target* lives in the same multi-level namespace -- that's what exercises the
+      // `MetricViewHelper.analyzeMetricViewText` lift to multi-part nameParts. The pre-lift
+      // code path failed at `ident.asTableIdentifier` with `requiresSinglePartNamespaceError`.
+      sql(s"CREATE NAMESPACE IF NOT EXISTS $testCatalogName.${deepNamespace.head}")
+      sql(s"CREATE NAMESPACE IF NOT EXISTS " +
+        s"$testCatalogName.${deepNamespace.mkString(".")}")
+      try {
+        val mv = MetricView(
+          "0.1",
+          AssetSource(fullSourceTableName),
+          where = None,
+          select = metricViewColumns)
+        val yaml = MetricViewFactory.toYAML(mv)
+        sql(
+          s"""CREATE VIEW $fullDeepName
+             |WITH METRICS
+             |LANGUAGE YAML
+             |AS
+             |$$$$
+             |$yaml
+             |$$$$""".stripMargin)
+
+        val deepIdent = Identifier.of(deepNamespace, deepMetricViewName)
+        val info = MetricViewRecordingCatalog.capturedViews.get(deepIdent)
+        assert(info != null, s"Expected ViewInfo for $deepIdent to be captured")
+        assert(info.properties().get(TableCatalog.PROP_TABLE_TYPE)
+          === TableSummary.METRIC_VIEW_TABLE_TYPE)
+      } finally {
+        scala.util.Try(sql(s"DROP VIEW IF EXISTS $fullDeepName"))
+        sql(s"DROP NAMESPACE IF EXISTS " +
+          s"$testCatalogName.${deepNamespace.mkString(".")} CASCADE")
+        sql(s"DROP NAMESPACE IF EXISTS $testCatalogName.${deepNamespace.head} CASCADE")
+      }
+    }
+  }
+
+  // ============================================================
+  // Section 2: Dependency extraction
+  // ============================================================
+
+
+  test("dependency extraction: SQL source JOIN captures both tables") {
+    withTestCatalogTables {
+      val secondSource = s"$testCatalogName.$testNamespace.customers"
+      sql(
+        s"""CREATE TABLE $secondSource (id INT, name STRING)
+           |USING foo""".stripMargin)
+      try {
+        val joinSql =
+          s"SELECT c.name, t.count FROM $fullSourceTableName t " +
+            s"JOIN $secondSource c ON t.count = c.id"
+        val metricView = MetricView(
+          "0.1",
+          SQLSource(joinSql),
+          where = None,
+          select = Seq(
+            Column("name", DimensionExpression("name"), 0),
+            Column("count_sum", MeasureExpression("sum(count)"), 1)))
+        createMetricView(fullMetricViewName, metricView)
+
+        val deps = capturedViewInfo().viewDependencies()
+        assert(deps != null)
+        val depParts = deps.dependencies()
+          .map(_.asInstanceOf[TableDependency].nameParts().toSeq).toSet
+        assert(depParts === Set(
+          Seq(testCatalogName, testNamespace, sourceTableName),
+          Seq(testCatalogName, testNamespace, "customers")),
+          s"Expected dependencies on both source tables, got $depParts")
+      } finally {
+        sql(s"DROP TABLE IF EXISTS $secondSource")
+      }
+    }
+  }
+
+  test("dependency extraction: SQL source subquery deduplicates same-table references") {
+    withTestCatalogTables {
+      val subquerySql =
+        s"SELECT * FROM $fullSourceTableName " +
+          s"WHERE count > (SELECT avg(count) FROM $fullSourceTableName)"
+      val metricView = MetricView(
+        "0.1",
+        SQLSource(subquerySql),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, metricView)
+
+      val deps = capturedViewInfo().viewDependencies()
+      assert(deps != null && deps.dependencies().length === 1,
+        s"Expected 1 deduplicated dependency, got " +
+          s"${Option(deps).map(_.dependencies().length).getOrElse(0)}")
+      val tableDep = deps.dependencies()(0).asInstanceOf[TableDependency]
+      assert(tableDep.nameParts().toSeq ===
+        Seq(testCatalogName, testNamespace, sourceTableName))
+    }
+  }
+
+  test("dependency extraction: SQL source self-join deduplicates same-table references") {
+    withTestCatalogTables {
+      val selfJoinSql =
+        s"SELECT a.region AS a_region, a.count AS a_count " +
+          s"FROM $fullSourceTableName a JOIN $fullSourceTableName b " +
+          s"ON a.region = b.region"
+      val metricView = MetricView(
+        "0.1",
+        SQLSource(selfJoinSql),
+        where = None,
+        select = Seq(
+          Column("region", DimensionExpression("a_region"), 0),
+          Column("count_sum", MeasureExpression("sum(a_count)"), 1)))
+      createMetricView(fullMetricViewName, metricView)
+
+      val deps = capturedViewInfo().viewDependencies()
+      assert(deps != null && deps.dependencies().length === 1,
+        s"Expected 1 deduplicated dependency for self-join, got " +
+          s"${Option(deps).map(_.dependencies().length).getOrElse(0)}")
+      val tableDep = deps.dependencies()(0).asInstanceOf[TableDependency]
+      assert(tableDep.nameParts().toSeq ===
+        Seq(testCatalogName, testNamespace, sourceTableName))
+    }
+  }
+
+  test("dependency extraction: V1 session-catalog source emits 3-part nameParts") {
+    val v1Source = "metric_view_v2_v1source"
+    spark.range(0, 5).toDF("v")
+      .write.mode("overwrite").saveAsTable(v1Source)
+    try {
+      withTestCatalogTables {
+        val mv = MetricView(
+          "0.1",
+          // SQL source resolves through the current (session) catalog; the resolved
+          // `LogicalRelation` carries a session-catalog `CatalogTable`.
+          SQLSource(s"SELECT v AS region, v AS count FROM $v1Source"),
+          where = None,
+          select = metricViewColumns)
+        createMetricView(fullMetricViewName, mv)
+
+        val deps = capturedViewInfo().viewDependencies()
+        assert(deps != null && deps.dependencies().length === 1)
+        val parts =
+          deps.dependencies()(0).asInstanceOf[TableDependency].nameParts().toSeq
+        // `MetricViewHelper.qualifyV1` normalizes any `TableIdentifier.nameParts` shape
+        // (1, 2, or 3 parts depending on what the analyzer captured) to the stable
+        // `[spark_catalog, db, table]` shape so downstream consumers see deterministic
+        // arity per source kind.
+        assert(parts.length === 3,
+          s"V1 nameParts should normalize to exactly 3 parts, got ${parts.length}: $parts")
+        assert(parts.head === "spark_catalog",
+          s"V1 nameParts head should be the session-catalog name, got $parts")
+        assert(parts.last === v1Source, s"Last part should be the table name, got $parts")
+      }
+    } finally {
+      sql(s"DROP TABLE IF EXISTS $v1Source")
+    }
+  }
+
+  test("dependency extraction: multi-level V2 namespace source emits N+2 nameParts") {
+    val multiNamespace = Array("ns_a", "ns_b")
+    val multiTable = "events_deep"
+    val multiFull = s"$testCatalogName.${multiNamespace.mkString(".")}.$multiTable"
+    withTestCatalogTables {
+      // The InMemoryTableCatalog (TableViewCatalog mixin) supports multi-level namespaces.
+      sql(s"CREATE NAMESPACE IF NOT EXISTS $testCatalogName.${multiNamespace.head}")
+      sql(s"CREATE NAMESPACE IF NOT EXISTS " +
+        s"$testCatalogName.${multiNamespace.mkString(".")}")
+      sql(s"CREATE TABLE $multiFull (region STRING, count INT) USING foo")
+      try {
+        val mv = MetricView(
+          "0.1",
+          SQLSource(s"SELECT region, count FROM $multiFull"),
+          where = None,
+          select = metricViewColumns)
+        createMetricView(fullMetricViewName, mv)
+
+        val deps = capturedViewInfo().viewDependencies()
+        assert(deps != null && deps.dependencies().length === 1)
+        val parts =
+          deps.dependencies()(0).asInstanceOf[TableDependency].nameParts().toSeq
+        assert(parts === Seq(testCatalogName, multiNamespace(0), multiNamespace(1), multiTable),
+          s"Multi-level nameParts should preserve every namespace component, got $parts")
+      } finally {
+        sql(s"DROP TABLE IF EXISTS $multiFull")
+        sql(s"DROP NAMESPACE IF EXISTS " +
+          s"$testCatalogName.${multiNamespace.mkString(".")} CASCADE")
+        sql(s"DROP NAMESPACE IF EXISTS $testCatalogName.${multiNamespace.head} CASCADE")
+      }
+    }
+  }
+
+  // ============================================================
+  // Section 3: SELECT cases
+  // ============================================================
+
+
+  test("SELECT measure(...) from a v2 metric view returns aggregated rows") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      // The fixture's `events` source has rows ("region_1", 1, 5.0), ("region_2", 2, 10.0).
+      // The metric view aggregates by `region` summing `count`. Resolution flows through
+      // loadTableOrView -> MetadataTable(ViewInfo) -> V1Table.toCatalogTable(ViewInfo) ->
+      // CatalogTableType.METRIC_VIEW -> ResolveMetricView, which rewrites the view body
+      // into Aggregate(Seq(region), Seq(sum(count) AS count_sum)) over `events`. The
+      // `measure(...)` wrapper is required for measure columns -- selecting `count_sum`
+      // bare would fail (mirrors the v1 `MetricViewSuite` query syntax).
+      checkAnswer(
+        sql(s"SELECT region, measure(count_sum) FROM $fullMetricViewName " +
+          "GROUP BY region ORDER BY region"),
+        sql(s"SELECT region, sum(count) FROM $fullSourceTableName " +
+          "GROUP BY region ORDER BY region"))
+    }
+  }
+
+  test("SELECT measure(...) with a WHERE clause on a dimension") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      // Filter at the query layer (not on the metric view's own `where:`).
+      checkAnswer(
+        sql(s"SELECT measure(count_sum) FROM $fullMetricViewName " +
+          "WHERE region = 'region_2'"),
+        sql(s"SELECT sum(count) FROM $fullSourceTableName " +
+          "WHERE region = 'region_2'"))
+    }
+  }
+
+  test("SELECT against a v2 metric view honors the view's pre-defined where clause") {
+    withTestCatalogTables {
+      // Pre-define a filter on the metric view itself: only rows with count > 1 should be
+      // visible to consumers (i.e. region_2 only).
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = Some("count > 1"),
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      checkAnswer(
+        sql(s"SELECT region, measure(count_sum) FROM $fullMetricViewName " +
+          "GROUP BY region ORDER BY region"),
+        sql(s"SELECT region, sum(count) FROM $fullSourceTableName " +
+          "WHERE count > 1 GROUP BY region ORDER BY region"))
+    }
+  }
+
+  test("SELECT from a v2 metric view supports multiple measures with different aggregations") {
+    withTestCatalogTables {
+      // Add a second measure (sum of price) so we exercise the multi-measure rewrite path.
+      val cols = Seq(
+        Column("region", DimensionExpression("region"), 0),
+        Column("count_sum", MeasureExpression("sum(count)"), 1),
+        Column("price_sum", MeasureExpression("sum(price)"), 2),
+        Column("price_max", MeasureExpression("max(price)"), 3))
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = cols)
+      createMetricView(fullMetricViewName, mv)
+      checkAnswer(
+        sql(s"SELECT measure(count_sum), measure(price_sum), measure(price_max) " +
+          s"FROM $fullMetricViewName"),
+        sql(s"SELECT sum(count), sum(price), max(price) FROM $fullSourceTableName"))
+    }
+  }
+
+  test("SELECT from a v2 metric view supports ORDER BY and LIMIT on measures") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      checkAnswer(
+        sql(s"SELECT region, measure(count_sum) FROM $fullMetricViewName " +
+          "GROUP BY region ORDER BY 2 DESC LIMIT 1"),
+        sql(s"SELECT region, sum(count) FROM $fullSourceTableName " +
+          "GROUP BY region ORDER BY 2 DESC LIMIT 1"))
+    }
+  }
+
+  // ============================================================
+  // Section 4: DESCRIBE cases
+  // ============================================================
+
+
+  test("DESCRIBE TABLE EXTENDED on a v2 metric view round-trips through loadTableOrView") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      val yaml = createMetricView(fullMetricViewName, mv)
+
+      // DESCRIBE TABLE EXTENDED resolves the ident through `Analyzer.lookupTableOrView`,
+      // which calls `TableViewCatalog.loadTableOrView` once and gets back a
+      // `MetadataTable(ViewInfo)`. The analyzer wraps it as a `ResolvedPersistentView` and
+      // `DataSourceV2Strategy` routes through SPARK-56655's `DescribeV2ViewExec`, which
+      // reads the typed `ViewInfo` directly and emits the standard "Type" / "View Text" /
+      // "View Current Catalog" / "View Schema Mode" / etc. rows. Pins that `DescribeV2ViewExec`
+      // emits a "Type" row for parity with v1 `CatalogTable.toJsonLinkedHashMap`, so users
+      // can distinguish a plain VIEW from a sub-kind like METRIC_VIEW.
+      val rows = sql(s"DESCRIBE TABLE EXTENDED $fullMetricViewName").collect()
+      val rowMap = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+
+      assert(rowMap.contains("View Text"),
+        s"Expected 'View Text' row in DESCRIBE EXTENDED output, got keys: ${rowMap.keys}")
+      // `DescribeV2ViewExec` writes `viewInfo.queryText` directly, so trim handles the
+      // leading/trailing newline the SQL `$$ ... $$` fixture inserts vs. the bare yaml body.
+      assert(rowMap("View Text").trim === yaml.trim,
+        s"View Text should round-trip the YAML body, got: ${rowMap("View Text")}")
+      assert(rowMap.get("Type").contains(TableSummary.METRIC_VIEW_TABLE_TYPE),
+        s"Type row should reflect METRIC_VIEW, got: ${rowMap.get("Type")}")
+    }
+  }
+
+  test("DESCRIBE TABLE on a v2 metric view returns the aliased columns") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      val rows = sql(s"DESCRIBE TABLE $fullMetricViewName").collect()
+      val byName = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+      assert(byName.contains("region"), s"Missing 'region' col, got: ${byName.keys}")
+      assert(byName.contains("count_sum"), s"Missing 'count_sum' col, got: ${byName.keys}")
+    }
+  }
+
+  // ============================================================
+  // Section 5: DROP / SHOW cases
+  // ============================================================
+
+
+  test("DROP VIEW succeeds on a V2 metric view") {
+    withTestCatalogTables {
+      val metricView = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, metricView)
+      val ident = Identifier.of(Array(testNamespace), metricViewName)
+
+      assert(MetricViewRecordingCatalog.capturedViews.containsKey(ident))
+
+      sql(s"DROP VIEW $fullMetricViewName")
+      assert(!MetricViewRecordingCatalog.capturedViews.containsKey(ident))
+    }
+  }
+
+  test("DROP TABLE on a v2 metric view throws WRONG_COMMAND_FOR_OBJECT_TYPE") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+
+      // SPARK-56655's `DropTableExec` actively rejects with `WRONG_COMMAND_FOR_OBJECT_TYPE`
+      // ("Use DROP VIEW instead") when a view sits at the ident, replacing the prior
+      // `EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE` decoding. Same actionable signal for users.
+      val ex = intercept[AnalysisException] {
+        sql(s"DROP TABLE $fullMetricViewName")
+      }
+      assert(ex.getCondition === "WRONG_COMMAND_FOR_OBJECT_TYPE",
+        s"Expected WRONG_COMMAND_FOR_OBJECT_TYPE, got ${ex.getCondition}: ${ex.getMessage}")
+      assert(ex.getMessage.contains("DROP VIEW"),
+        s"Error message should mention 'DROP VIEW', got: ${ex.getMessage}")
+
+      // The metric view is still present after the failed DROP TABLE.
+      val ident = Identifier.of(Array(testNamespace), metricViewName)
+      assert(MetricViewRecordingCatalog.capturedViews.containsKey(ident),
+        "DROP TABLE on a metric view must not delete it.")
+    }
+  }
+
+  test("DROP TABLE IF EXISTS on a v2 metric view also throws WRONG_COMMAND_FOR_OBJECT_TYPE") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+
+      // IF EXISTS does not silence the wrong-type error: the entity exists, just not as a
+      // table. (Mirrors the v1 `DropTableCommand` behavior; `IF EXISTS` only short-circuits
+      // the not-found branch.)
+      val ex = intercept[AnalysisException] {
+        sql(s"DROP TABLE IF EXISTS $fullMetricViewName")
+      }
+      assert(ex.getCondition === "WRONG_COMMAND_FOR_OBJECT_TYPE",
+        s"Expected WRONG_COMMAND_FOR_OBJECT_TYPE, got ${ex.getCondition}: ${ex.getMessage}")
+    }
+  }
+
+  test("SHOW CREATE TABLE on a v2 metric view is unsupported") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+
+      // SHOW CREATE TABLE on a metric view is rejected with the dedicated
+      // UNSUPPORTED_SHOW_CREATE_TABLE.ON_METRIC_VIEW error class (same one the v1 path uses
+      // in `tables.scala`'s `ShowCreateTableCommand`), so the message is identical no matter
+      // which catalog kind owns the view. There's no round-trippable
+      // `CREATE VIEW ... WITH METRICS` form yet, so explicit "unsupported" is the right
+      // answer rather than emitting a misleading plain `CREATE VIEW ...`.
+      val ex = intercept[AnalysisException] {
+        sql(s"SHOW CREATE TABLE $fullMetricViewName")
+      }
+      assert(ex.getCondition === "UNSUPPORTED_SHOW_CREATE_TABLE.ON_METRIC_VIEW",
+        s"Expected UNSUPPORTED_SHOW_CREATE_TABLE.ON_METRIC_VIEW, got " +
+          s"${ex.getCondition}: ${ex.getMessage}")
+      assert(ex.getMessage.contains("metric view"),
+        s"Error message should mention 'metric view', got: ${ex.getMessage}")
+    }
+  }
+
+  test("DROP VIEW IF EXISTS on a non-existent V2 metric view is a no-op") {
+    withTestCatalogTables {
+      sql(s"DROP VIEW IF EXISTS $testCatalogName.$testNamespace.does_not_exist")
+    }
+  }
+
+  test("ALTER VIEW <metric_view> RENAME TO ... succeeds and preserves metric view metadata") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      // Per upstream DataSourceV2SQLSuite convention (see lines 2477 / 2484 there), the
+      // RENAME TO clause takes a 2-part `namespace.name` -- the new ident is implicitly
+      // within the same catalog as the source view. Including a 3-part `catalog.ns.name`
+      // would leak the catalog component into `newName.asIdentifier` and the catalog's
+      // `renameView` would store under a key the loader can't find.
+      val renamedRelative = s"$testNamespace.mv_renamed"
+      val renamedFull = s"$testCatalogName.$renamedRelative"
+      try {
+        // RenameTable on a `ResolvedPersistentView` is routed by `DataSourceV2Strategy` to
+        // `RenameV2ViewExec`, which calls `ViewCatalog.renameView` -- the fixture
+        // `MetricViewRecordingCatalog.renameView` relocates both the `views` entry and the
+        // `capturedViews` entry under the new ident. Pin the wiring end-to-end so the
+        // metric view kind survives the rename.
+        sql(s"ALTER VIEW $fullMetricViewName RENAME TO $renamedRelative")
+
+        // Old ident is gone from the v2 catalog -- DESCRIBE should fail to resolve.
+        val oldEx = intercept[AnalysisException] {
+          sql(s"DESCRIBE TABLE $fullMetricViewName").collect()
+        }
+        assert(oldEx.getCondition === "TABLE_OR_VIEW_NOT_FOUND",
+          s"Expected TABLE_OR_VIEW_NOT_FOUND for the old ident, got " +
+            s"${oldEx.getCondition}: ${oldEx.getMessage}")
+
+        // New ident loads through `TableViewCatalog.loadTableOrView` and surfaces the same
+        // metric-view kind on `DESCRIBE TABLE EXTENDED`.
+        val rows = sql(s"DESCRIBE TABLE EXTENDED $renamedFull").collect()
+        val rowMap = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+        assert(rowMap.get("Type").contains(TableSummary.METRIC_VIEW_TABLE_TYPE),
+          s"Renamed view should still be a METRIC_VIEW, got Type=${rowMap.get("Type")}")
+      } finally {
+        sql(s"DROP VIEW IF EXISTS $renamedFull")
+      }
+    }
+  }
+
+  test("SHOW TABLES on a v2 TableViewCatalog lists both tables and metric views") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      val tables = sql(s"SHOW TABLES IN $testCatalogName.$testNamespace")
+        .collect().map(_.getString(1)).toSet
+      // SPARK-56655 routes SHOW TABLES on a `TableViewCatalog` through `listRelationSummaries`
+      // so views appear alongside tables in the output (matching v1 SHOW TABLES on a session
+      // catalog). Pure `TableCatalog` catalogs continue to return tables only.
+      assert(tables.contains(sourceTableName),
+        s"SHOW TABLES should list the source table, got: $tables")
+      assert(tables.contains(metricViewName),
+        s"SHOW TABLES on a TableViewCatalog should also list metric views, got: $tables")
+    }
+  }
+
+  test("SHOW VIEWS lists v2 metric views") {
+    withTestCatalogTables {
+      val mv = MetricView(
+        "0.1",
+        AssetSource(fullSourceTableName),
+        where = None,
+        select = metricViewColumns)
+      createMetricView(fullMetricViewName, mv)
+      val views = sql(s"SHOW VIEWS IN $testCatalogName.$testNamespace")
+        .collect().map(_.getString(1)).toSet
+      assert(views.contains(metricViewName),
+        s"SHOW VIEWS should list metric views, got: $views")
+    }
+  }
+}
+
+object MetricViewV2CatalogSuite {
+  val noViewCatalogName: String = "testcat_no_view"
+}
+
+/**
+ * Minimal [[TableViewCatalog]] used by [[MetricViewV2CatalogSuite]]. Layers `ViewCatalog`
+ * methods over [[InMemoryTableCatalog]] (which provides table storage + namespace ops) and
+ * captures every [[ViewInfo]] passed to `createView` so tests can inspect the typed payload.
+ *
+ * The metric-view CREATE path goes via `ViewCatalog.createView`, so the captured map keys are
+ * the view identifiers; the source table created by the test fixture is stored separately in
+ * the inherited table catalog.
+ */
+class MetricViewRecordingCatalog extends InMemoryTableCatalog with TableViewCatalog {
+  private val views =
+    new ConcurrentHashMap[(Seq[String], String), ViewInfo]()
+
+  // -- ViewCatalog methods --
+
+  override def listViews(namespace: Array[String]): Array[Identifier] = {
+    val target = namespace.toSeq
+    val out = new java.util.ArrayList[Identifier]()
+    views.forEach { (key, _) =>
+      if (key._1 == target) out.add(Identifier.of(key._1.toArray, key._2))
+    }
+    out.asScala.toArray
+  }
+
+  // `loadView`, `tableExists`, and `viewExists` are inherited from `TableViewCatalog`'s
+  // defaults, which derive from `loadTableOrView` -- a stored `ViewInfo` is wrapped in
+  // `MetadataTable` by `loadTableOrView` and the defaults unwrap it correctly.
+
+  // Bypasses `TableViewCatalog.tableExists` (whose default delegates to `loadTableOrView`,
+  // which checks our `views` map first); we want a tables-only check here so the cross-type
+  // collision branches in `createView` / `replaceView` see only "is there a *table* at this
+  // ident?".
+  private def tableExistsTablesOnly(ident: Identifier): Boolean =
+    try { super[InMemoryTableCatalog].loadTable(ident); true }
+    catch { case _: org.apache.spark.sql.catalyst.analysis.NoSuchTableException => false }
+
+  override def createView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    // TableViewCatalog active-rejection contract: createView must throw
+    // ViewAlreadyExistsException when *either* a view *or* a table sits at the ident.
+    if (tableExistsTablesOnly(ident)) {
+      throw new ViewAlreadyExistsException(ident)
+    }
+    val key = (ident.namespace().toSeq, ident.name())
+    if (views.putIfAbsent(key, info) != null) {
+      throw new ViewAlreadyExistsException(ident)
+    }
+    MetricViewRecordingCatalog.capturedViews.put(ident, info)
+    info
+  }
+
+  override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = {
+    // Per the TableViewCatalog contract, replaceView must surface NoSuchViewException
+    // when a *table* sits at the ident (not silently succeed and shadow the table).
+    if (tableExistsTablesOnly(ident)) throw new NoSuchViewException(ident)
+    val key = (ident.namespace().toSeq, ident.name())
+    if (!views.containsKey(key)) throw new NoSuchViewException(ident)
+    views.put(key, info)
+    MetricViewRecordingCatalog.capturedViews.put(ident, info)
+    info
+  }
+
+  override def dropView(ident: Identifier): Boolean = {
+    val key = (ident.namespace().toSeq, ident.name())
+    val removed = views.remove(key) != null
+    if (removed) {
+      MetricViewRecordingCatalog.capturedViews.remove(ident)
+    }
+    removed
+  }
+
+  override def renameView(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    val oldKey = (oldIdent.namespace().toSeq, oldIdent.name())
+    val newKey = (newIdent.namespace().toSeq, newIdent.name())
+    val existing = views.get(oldKey)
+    if (existing == null) throw new NoSuchViewException(oldIdent)
+    if (views.putIfAbsent(newKey, existing) != null) {
+      throw new ViewAlreadyExistsException(newIdent)
+    }
+    views.remove(oldKey)
+    val captured = MetricViewRecordingCatalog.capturedViews.remove(oldIdent)
+    if (captured != null) {
+      MetricViewRecordingCatalog.capturedViews.put(newIdent, captured)
+    }
+  }
+
+  // -- TableViewCatalog single-RPC perf path --
+
+  override def loadTableOrView(ident: Identifier): Table = {
+    val key = (ident.namespace().toSeq, ident.name())
+    Option(views.get(key)) match {
+      case Some(info) => new MetadataTable(info, ident.toString)
+      // Bypass `TableViewCatalog.loadTable` (whose default delegates back to `loadTableOrView`)
+      // and call `InMemoryTableCatalog.loadTable` directly to avoid infinite recursion.
+      case None => super[InMemoryTableCatalog].loadTable(ident)
+    }
+  }
+}
+
+object MetricViewRecordingCatalog {
+  // Captures every ViewInfo that flows through createView / replaceView so individual tests
+  // can assert on it. Cleared between tests via `reset()`.
+  val capturedViews: ConcurrentHashMap[Identifier, ViewInfo] =
+    new ConcurrentHashMap[Identifier, ViewInfo]()
+
+  def reset(): Unit = capturedViews.clear()
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
index a85adc9ebf816..638d69fd7191d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation
 import org.apache.spark.sql.classic
 import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ThreadUtils
 import org.apache.spark.util.Utils.REDACTION_REPLACEMENT_TEXT
@@ -396,6 +397,30 @@ class SQLExecutionSuite extends SparkFunSuite with SQLConfHelper {
       spark.stop()
     }
   }
+
+  test("SQL execution description should respect spark.sql.redaction.string.regex") {
+    val spark = SparkSession.builder().master("local[*]").appName("test").getOrCreate()
+    try {
+      withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> "password=([^\\s]+)") {
+        var sqlExecutionDescription: String = null
+        spark.sparkContext.addSparkListener(new SparkListener {
+          override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
+            case e: SparkListenerSQLExecutionStart =>
+              sqlExecutionDescription = e.description
+            case _ =>
+          }
+        })
+
+        val sqlStatement = "SELECT 'password=secret123'"
+        spark.sparkContext.setJobDescription(sqlStatement)
+        spark.sql(sqlStatement).collect()
+        spark.sparkContext.listenerBus.waitUntilEmpty()
+        assert(sqlExecutionDescription === s"SELECT '$REDACTION_REPLACEMENT_TEXT")
+      }
+    } finally {
+      spark.stop()
+    }
+  }
 }
 
 object SQLExecutionSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
index 805fff7fa60f2..4362064eb8617 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
@@ -17,7 +17,11 @@
 
 package org.apache.spark.sql.execution
 
+import java.text.SimpleDateFormat
+import java.util.Locale
+
 import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
 /**
@@ -111,4 +115,248 @@ class SQLFunctionSuite extends SharedSparkSession {
       )
     }
   }
+
+  test("describe SQL scalar functions") {
+    withUserDefinedFunction("foo" -> true, "bar" -> true, "area" -> false) {
+      // Temporary function
+      sql(
+        """
+          |CREATE TEMPORARY FUNCTION foo() RETURNS int
+          |COMMENT 'function foo' RETURN 1
+          |""".stripMargin)
+      checkKeywordsExist(sql("describe function foo"),
+        "Function:", "foo",
+        "Type:", "SCALAR",
+        "Input:", "()",
+        "Returns:", "INT")
+      checkKeywordsExist(sql("describe function extended foo"),
+        "Deterministic: true",
+        "Data Access:", "CONTAINS SQL",
+        "Comment:", "function foo",
+        "Create Time:",
+        "Body:", "1")
+      sql(
+        """
+          |CREATE TEMPORARY FUNCTION bar(x int default 8,
+          |y int default substr('8hello', 1, 1) comment 'var_y')
+          |RETURNS int COMMENT 'function bar' RETURN x + y
+          |""".stripMargin)
+      checkKeywordsExist(sql("describe function bar"),
+        "Function:", "bar",
+        "Input:", "x INT", "y INT",
+        "Returns:", "INT")
+      checkKeywordsExist(sql("describe function extended bar"),
+        "Input:", "x INT DEFAULT 8", "y INT DEFAULT substr('8hello', 1, 1) 'var_y'",
+        "Comment:", "function bar",
+        "Deterministic: true",
+        "Data Access:", "CONTAINS SQL",
+        "Body:", "x + y")
+      // Permanent function
+      val beforeMs = System.currentTimeMillis()
+      sql(
+        """
+          |CREATE FUNCTION area(width double comment 'width', height double comment 'height')
+          |RETURNS double
+          |COMMENT 'compute area'
+          |DETERMINISTIC
+          |RETURN width * height
+          |""".stripMargin)
+      val afterMs = System.currentTimeMillis()
+      checkKeywordsExist(sql("describe function area"),
+        "Function:", "default.area",
+        "Type:", "SCALAR",
+        "Input:", "width  DOUBLE", "height DOUBLE",
+        "Returns:", "DOUBLE")
+      val extendedRows = sql("describe function extended area").collect()
+      checkKeywordsExist(sql("describe function extended area"),
+        "Input:", "width  DOUBLE 'width'", "height DOUBLE 'height'",
+        "Comment:", "compute area",
+        "Deterministic: true",
+        "Data Access:", "CONTAINS SQL",
+        "Create Time:",
+        "Body:", "width * height")
+      // Verify the rendered Create Time falls within a small window around the
+      // CREATE FUNCTION call, i.e. the timestamp set at CREATE time was preserved
+      // (and not silently overwritten by a later cache-build / metadata-load).
+      val createTimeRow = extendedRows.map(_.getString(0))
+        .find(_.startsWith("Create Time:"))
+        .getOrElse(fail("DESCRIBE FUNCTION EXTENDED is missing the Create Time row"))
+      val tsStr = createTimeRow.split("Create Time:", 2)(1).trim
+      // Date.toString() format -- explicit Locale.ENGLISH avoids parser drift on
+      // build hosts whose default locale is not English.
+      val sdf = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", Locale.ENGLISH)
+      val parsedMs = sdf.parse(tsStr).getTime
+      // Date.toString() truncates to seconds; use a 2-second slop on each side.
+      val slopMs = 2000L
+      assert(parsedMs >= beforeMs - slopMs,
+        s"Create Time '$tsStr' is before CREATE FUNCTION (beforeMs=$beforeMs)")
+      assert(parsedMs <= afterMs + slopMs,
+        s"Create Time '$tsStr' is after DESCRIBE FUNCTION (afterMs=$afterMs)")
+    }
+  }
+
+  test("describe SQL table functions") {
+    withUserDefinedFunction("foo" -> false) {
+      sql(
+        """
+          |CREATE FUNCTION foo(x INT) RETURNS TABLE (a INT, b STRING)
+          |COMMENT 'table function foo' RETURN SELECT x, x
+          |""".stripMargin)
+      checkKeywordsExist(sql("describe function foo"),
+        "Function:", "foo",
+        "Type:", "TABLE",
+        "Input:", "x INT",
+        "Returns:", "a INT", "b STRING")
+      checkKeywordsExist(sql("describe function extended foo"),
+        "Comment:", "table function foo",
+        "Deterministic: true",
+        "Data Access:", "CONTAINS SQL",
+        "Create Time:",
+        "Body:", "SELECT x, x")
+    }
+  }
+
+  test("describe SQL functions with derived routine characteristics") {
+    withUserDefinedFunction("foo" -> false, "bar" -> false, "baz" -> false) {
+      withTable("tbl_for_describe") {
+        sql("CREATE TABLE tbl_for_describe AS SELECT 1 AS x")
+        sql("CREATE FUNCTION foo() RETURNS TABLE(x INT) RETURN SELECT * FROM tbl_for_describe")
+        sql("CREATE FUNCTION bar() RETURNS DOUBLE RETURN SELECT SUM(x) + rand() FROM foo()")
+        sql("CREATE FUNCTION baz() RETURNS INT NOT DETERMINISTIC READS SQL DATA RETURN 1")
+        checkKeywordsExist(sql("DESCRIBE FUNCTION EXTENDED foo"),
+          "Deterministic: true",
+          "Data Access:", "READS SQL DATA")
+        checkKeywordsExist(sql("DESCRIBE FUNCTION EXTENDED bar"),
+          "Deterministic: false",
+          "Data Access:", "READS SQL DATA")
+        // Do not overwrite user-specified routine characteristics.
+        checkKeywordsExist(sql("DESCRIBE FUNCTION EXTENDED baz"),
+          "Deterministic: false",
+          "Data Access:", "READS SQL DATA")
+      }
+    }
+  }
+
+  test("SPARK-56639: SQL function uses frozen SQL path") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      withDatabase("path_func_db_a", "path_func_db_b") {
+        withTable("path_func_db_a.frozen_t", "path_func_db_b.frozen_t") {
+          withUserDefinedFunction("frozen_fn" -> false) {
+            sql("USE default")
+            sql("CREATE DATABASE path_func_db_a")
+            sql("CREATE DATABASE path_func_db_b")
+            sql("CREATE TABLE path_func_db_a.frozen_t USING parquet AS SELECT 10 AS id")
+            sql("CREATE TABLE path_func_db_b.frozen_t USING parquet AS SELECT 20 AS id")
+            try {
+              sql("SET PATH = spark_catalog.path_func_db_a, system.builtin")
+              sql(
+                """
+                  |CREATE FUNCTION frozen_fn()
+                  |RETURNS INT
+                  |RETURN (SELECT MAX(id) FROM frozen_t)
+                  |""".stripMargin)
+              sql("SET PATH = spark_catalog.path_func_db_b, system.builtin")
+
+              checkAnswer(sql("SELECT MAX(id) FROM frozen_t"), Row(20))
+              checkAnswer(sql("SELECT default.frozen_fn()"), Row(10))
+              // DESCRIBE FUNCTION EXTENDED renders the frozen creator path,
+              // not the invoker's current PATH. SqlPathFormat.formatForDisplay
+              // back-ticks identifiers only when needed, so plain ASCII
+              // identifiers appear unquoted.
+              checkKeywordsExist(sql("DESCRIBE FUNCTION EXTENDED default.frozen_fn"),
+                "SQL Path:",
+                "spark_catalog.path_func_db_a, system.builtin")
+              checkKeywordsNotExist(sql("DESCRIBE FUNCTION EXTENDED default.frozen_fn"),
+                "path_func_db_b")
+            } finally {
+              sql("SET PATH = DEFAULT_PATH")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-56639: SQL table function uses frozen SQL path") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      withDatabase("path_tvf_db_a", "path_tvf_db_b") {
+        withTable("path_tvf_db_a.frozen_t", "path_tvf_db_b.frozen_t") {
+          withUserDefinedFunction("frozen_tvf" -> false) {
+            sql("USE default")
+            sql("CREATE DATABASE path_tvf_db_a")
+            sql("CREATE DATABASE path_tvf_db_b")
+            sql("CREATE TABLE path_tvf_db_a.frozen_t USING parquet AS SELECT 100 AS id")
+            sql("CREATE TABLE path_tvf_db_b.frozen_t USING parquet AS SELECT 200 AS id")
+            try {
+              sql("SET PATH = spark_catalog.path_tvf_db_a, system.builtin")
+              sql(
+                """
+                  |CREATE FUNCTION frozen_tvf()
+                  |RETURNS TABLE(id INT)
+                  |RETURN SELECT MAX(id) AS id FROM frozen_t
+                  |""".stripMargin)
+              sql("SET PATH = spark_catalog.path_tvf_db_b, system.builtin")
+
+              checkAnswer(sql("SELECT MAX(id) FROM frozen_t"), Row(200))
+              checkAnswer(sql("SELECT * FROM default.frozen_tvf()"), Row(100))
+            } finally {
+              sql("SET PATH = DEFAULT_PATH")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Regression guard: frozen resolution path must not leak into CURRENT_SCHEMA/CURRENT_PATH.
+  test("SPARK-56639: current_schema/current_path in SQL functions use invoker context") {
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      withDatabase("path_ctx_fn_a", "path_ctx_fn_b") {
+        withUserDefinedFunction("path_ctx_fn_a.f_scalar_ctx" -> false,
+          "path_ctx_fn_a.f_table_ctx" -> false) {
+          sql("CREATE DATABASE path_ctx_fn_a")
+          sql("CREATE DATABASE path_ctx_fn_b")
+          try {
+            sql("USE path_ctx_fn_a")
+            sql(
+              """
+                |CREATE FUNCTION path_ctx_fn_a.f_scalar_ctx()
+                |RETURNS STRING
+                |RETURN concat(current_schema(), '::', current_path())
+                |""".stripMargin)
+            sql(
+              """
+                |CREATE FUNCTION path_ctx_fn_a.f_table_ctx()
+                |RETURNS TABLE(cs STRING, cp STRING)
+                |RETURN SELECT current_schema() AS cs, current_path() AS cp
+                |""".stripMargin)
+
+            sql("USE path_ctx_fn_b")
+            sql("SET PATH = DEFAULT_PATH")
+
+            val scalar = sql("SELECT path_ctx_fn_a.f_scalar_ctx()").head().getString(0)
+            assert(scalar.startsWith("path_ctx_fn_b::"),
+              s"Expected scalar function to use invoker current_schema, got: $scalar")
+            assert(scalar.contains("path_ctx_fn_b"),
+              s"Expected scalar function to use invoker current_path, got: $scalar")
+            assert(!scalar.contains("path_ctx_fn_a"),
+              s"Did not expect creator schema in scalar function context, got: $scalar")
+
+            val table = sql("SELECT cs, cp FROM path_ctx_fn_a.f_table_ctx()").head()
+            val tableSchema = table.getString(0)
+            val tablePath = table.getString(1)
+            assert(tableSchema == "path_ctx_fn_b",
+              s"Expected table function to use invoker current_schema, got: $tableSchema")
+            assert(tablePath.contains("path_ctx_fn_b"),
+              s"Expected table function to use invoker current_path, got: $tablePath")
+            assert(!tablePath.contains("path_ctx_fn_a"),
+              s"Did not expect creator schema in table function context, got: $tablePath")
+          } finally {
+            sql("SET PATH = DEFAULT_PATH")
+            sql("USE default")
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index d44737cd2ffd1..3fb54d7c43d58 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.{SparkArithmeticException, SparkException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Add, Alias, Divide}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.Project
@@ -1409,4 +1410,182 @@ abstract class SQLViewSuite extends QueryTest {
       }
     }
   }
+
+  test("SPARK-56639: permanent view uses frozen SQL path") {
+    withSQLConf(PATH_ENABLED.key -> "true") {
+      withDatabase("path_view_db_a", "path_view_db_b") {
+        withTable("path_view_db_a.frozen_t", "path_view_db_b.frozen_t") {
+          withView("default.v_path_frozen") {
+            sql("USE default")
+            sql("CREATE DATABASE path_view_db_a")
+            sql("CREATE DATABASE path_view_db_b")
+            sql("CREATE TABLE path_view_db_a.frozen_t USING parquet AS SELECT 1 AS id")
+            sql("CREATE TABLE path_view_db_b.frozen_t USING parquet AS SELECT 2 AS id")
+            try {
+              sql("SET PATH = spark_catalog.path_view_db_a, system.builtin")
+              sql("CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t")
+              sql("SET PATH = spark_catalog.path_view_db_b, system.builtin")
+
+              checkAnswer(sql("SELECT id FROM frozen_t"), Row(2))
+              checkAnswer(sql("SELECT id FROM default.v_path_frozen"), Row(1))
+            } finally {
+              sql("SET PATH = DEFAULT_PATH")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-56639: malformed persisted view path fails analysis") {
+    withView("default.v_bad_path_metadata") {
+      sql("CREATE VIEW default.v_bad_path_metadata AS SELECT 1 AS id")
+      val ident = TableIdentifier("v_bad_path_metadata", Some("default"))
+      val metadata = spark.sessionState.catalog.getTableMetadata(ident)
+      val corrupted = metadata.copy(
+        properties = metadata.properties + (CatalogTable.VIEW_RESOLUTION_PATH -> "{bad-json"))
+      spark.sessionState.catalog.alterTable(corrupted)
+
+      val e = intercept[AnalysisException] {
+        sql("SELECT * FROM default.v_bad_path_metadata").collect()
+      }
+      assert(e.getMessage.contains("Invalid stored SQL path metadata for view"), e.getMessage)
+    }
+  }
+
+  test("SPARK-56853: stored view path is ignored when PATH is disabled at read time") {
+    // A view created with PATH enabled persists two things in metadata: the frozen
+    // resolution path AND the creator session's current catalog+namespace at CREATE
+    // VIEW time (the view's `viewCatalogAndNamespace` property). If the reader's
+    // session has `spark.sql.path.enabled=false`, the pinned entries are intentionally
+    // dropped (`CatalogManager.resolutionPathEntriesForAnalysis`); the view body's
+    // unqualified references fall back to that captured catalog+namespace, which is
+    // the creator's USE state at CREATE time -- NOT the schema the view physically
+    // lives in (the two coincide below only because the test runs
+    // `USE spark_catalog.compat_view_b` before CREATE VIEW). Verify both directions:
+    //   - fully-qualified bodies keep working (qualification doesn't depend on PATH),
+    //   - unqualified bodies that relied on the frozen path now resolve via the
+    //     captured viewCatalogAndNamespace.
+    withDatabase("compat_view_a", "compat_view_b") {
+      sql("CREATE DATABASE compat_view_a")
+      sql("CREATE DATABASE compat_view_b")
+      withTable(
+          "compat_view_a.compat_t",
+          "compat_view_b.compat_t") {
+        sql("CREATE TABLE compat_view_a.compat_t USING parquet AS SELECT 1 AS id")
+        sql("CREATE TABLE compat_view_b.compat_t USING parquet AS SELECT 2 AS id")
+        withView(
+            "compat_view_b.v_unq_path",
+            "compat_view_b.v_fq_path") {
+          // Create both views with USE compat_view_b in effect so the stored
+          // viewCatalogAndNamespace points at compat_view_b, then SET PATH=a so the
+          // frozen path pins compat_view_a.
+          withSQLConf(PATH_ENABLED.key -> "true") {
+            try {
+              sql("USE spark_catalog.compat_view_b")
+              sql("SET PATH = spark_catalog.compat_view_a, system.builtin")
+              sql(
+                """
+                  |CREATE VIEW compat_view_b.v_unq_path AS
+                  |SELECT id FROM compat_t
+                  |""".stripMargin)
+              sql(
+                """
+                  |CREATE VIEW compat_view_b.v_fq_path AS
+                  |SELECT id FROM spark_catalog.compat_view_a.compat_t
+                  |""".stripMargin)
+            } finally {
+              sql("SET PATH = DEFAULT_PATH")
+              sql("USE spark_catalog.default")
+            }
+          }
+
+          // Now read with PATH disabled. The fully-qualified view body is independent of
+          // PATH and must keep returning rows from compat_view_a. The unqualified-body view
+          // drops its frozen-path pin and falls back to viewCatalogAndNamespace
+          // (compat_view_b), so unqualified `compat_t` resolves to compat_view_b.compat_t.
+          withSQLConf(PATH_ENABLED.key -> "false") {
+            checkAnswer(sql("SELECT id FROM compat_view_b.v_fq_path"), Row(1))
+            checkAnswer(sql("SELECT id FROM compat_view_b.v_unq_path"), Row(2))
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-56853: stored view path with no fallback target fails clearly when PATH is off") {
+    // Same shape as the previous test, but the captured `viewCatalogAndNamespace`
+    // (the creator's USE state at CREATE VIEW time -- set here via
+    // `USE spark_catalog.compat_home_only`) does NOT contain the unqualified name.
+    // Under PATH disabled the analyzer cannot fall back anywhere, so the lookup
+    // must raise TABLE_OR_VIEW_NOT_FOUND against that captured catalog+namespace.
+    withDatabase("compat_home_only", "compat_referenced") {
+      sql("CREATE DATABASE compat_home_only")
+      sql("CREATE DATABASE compat_referenced")
+      withTable("compat_referenced.only_here") {
+        sql("CREATE TABLE compat_referenced.only_here USING parquet AS SELECT 7 AS id")
+        withView("compat_home_only.v_unq_home") {
+          withSQLConf(PATH_ENABLED.key -> "true") {
+            try {
+              sql("USE spark_catalog.compat_home_only")
+              sql("SET PATH = spark_catalog.compat_referenced, system.builtin")
+              sql(
+                """
+                  |CREATE VIEW compat_home_only.v_unq_home AS
+                  |SELECT id FROM only_here
+                  |""".stripMargin)
+            } finally {
+              sql("SET PATH = DEFAULT_PATH")
+              sql("USE spark_catalog.default")
+            }
+          }
+
+          withSQLConf(PATH_ENABLED.key -> "false") {
+            val e = intercept[AnalysisException] {
+              sql("SELECT id FROM compat_home_only.v_unq_home").collect()
+            }
+            assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" ||
+                e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"),
+              s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: ${e.getMessage}")
+          }
+        }
+      }
+    }
+  }
+
+  // Regression guard: frozen resolution path must not leak into CURRENT_SCHEMA/CURRENT_PATH.
+  test("SPARK-56639: current_schema/current_path in persisted view use invoker context") {
+    withSQLConf(PATH_ENABLED.key -> "true") {
+      withDatabase("path_ctx_view_a", "path_ctx_view_b") {
+        withView("path_ctx_view_a.v_ctx") {
+          sql("CREATE DATABASE path_ctx_view_a")
+          sql("CREATE DATABASE path_ctx_view_b")
+          try {
+            sql("USE path_ctx_view_a")
+            sql(
+              """
+                |CREATE VIEW path_ctx_view_a.v_ctx AS
+                |SELECT current_schema() AS cs, current_path() AS cp
+                |""".stripMargin)
+
+            sql("USE path_ctx_view_b")
+            sql("SET PATH = DEFAULT_PATH")
+            val row = sql("SELECT cs, cp FROM path_ctx_view_a.v_ctx").head()
+            val currentSchema = row.getString(0)
+            val currentPath = row.getString(1)
+
+            assert(currentSchema == "path_ctx_view_b",
+              s"Expected invoker current_schema, got: $currentSchema")
+            assert(currentPath.contains("path_ctx_view_b"),
+              s"Expected invoker current_path to include path_ctx_view_b, got: $currentPath")
+            assert(!currentPath.contains("path_ctx_view_a"),
+              s"Did not expect creator schema in current_path, got: $currentPath")
+          } finally {
+            sql("SET PATH = DEFAULT_PATH")
+            sql("USE default")
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index f55d4b8cb9e61..1e5b1cdfaeee7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAM
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -881,163 +881,6 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
     }
   }
 
-  test("Schema evolution views should preserve manually set comments") {
-    withTable("t") {
-      withView("v") {
-        // Create table with comments.
-        sql("CREATE TABLE t (c1 INT COMMENT " +
-          "'table comment 1', c2 STRING COMMENT 'table comment 2')")
-        sql("INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c')")
-
-        // Create view with schema evolution (no column list) - initially adopts table comments.
-        sql("CREATE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t")
-
-        // Verify initial comments from table are adopted.
-        val descInitial = sql("DESCRIBE EXTENDED v").collect()
-        val c1CommentInitial = descInitial.filter(r => r.getString(0) == "c1")
-        val c2CommentInitial = descInitial.filter(r => r.getString(0) == "c2")
-        assert(c1CommentInitial.nonEmpty && c1CommentInitial(0).getString(2) == "table comment 1",
-          "Initial c1 comment should be 'table comment 1' from table")
-        assert(c2CommentInitial.nonEmpty && c2CommentInitial(0).getString(2) == "table comment 2",
-          "Initial c2 comment should be 'table comment 2' from table")
-
-        // Simulate user manually changing view comments (via UI or ALTER COLUMN).
-        val catalog = spark.sessionState.catalog
-        val viewMeta = catalog.getTableMetadata(TableIdentifier("v"))
-        val newSchema = StructType(Seq(
-          StructField("c1", IntegerType, nullable = true).withComment("user comment 1"),
-          StructField("c2", StringType, nullable = true).withComment("user comment 2")
-        ))
-        catalog.alterTable(viewMeta.copy(schema = newSchema))
-
-        // Verify manually set comments.
-        val descManual = sql("DESCRIBE EXTENDED v").collect()
-        val c1CommentManual = descManual.filter(r => r.getString(0) == "c1")
-        val c2CommentManual = descManual.filter(r => r.getString(0) == "c2")
-        assert(c1CommentManual.nonEmpty && c1CommentManual(0).getString(2) == "user comment 1",
-          "c1 comment should be 'user comment 1'")
-        assert(c2CommentManual.nonEmpty && c2CommentManual(0).getString(2) == "user comment 2",
-          "c2 comment should be 'user comment 2'")
-
-        // SELECT from view (triggers ViewSyncSchemaToMetaStore).
-        checkAnswer(sql("SELECT * FROM v"), Seq(Row(1, "a"), Row(2, "b"), Row(3, "c")))
-
-        // Verify manually set comments are PRESERVED (not reverted to table comments).
-        val descAfterSelect = sql("DESCRIBE EXTENDED v").collect()
-        val c1CommentAfter = descAfterSelect.filter(r => r.getString(0) == "c1")
-        val c2CommentAfter = descAfterSelect.filter(r => r.getString(0) == "c2")
-        assert(c1CommentAfter.nonEmpty && c1CommentAfter(0).getString(2) == "user comment 1",
-          "c1 comment should still be 'user comment 1' after SELECT (bug: was reverted)")
-        assert(c2CommentAfter.nonEmpty && c2CommentAfter(0).getString(2) == "user comment 2",
-          "c2 comment should still be 'user comment 2' after SELECT (bug: was reverted)")
-
-        // Verify that type changes are still adopted.
-        sql("DROP TABLE t")
-        sql("CREATE TABLE t (c1 BIGINT COMMENT 'table comment changed', " +
-          "c2 DOUBLE COMMENT 'table comment changed 2')")
-        sql("INSERT INTO t VALUES (4, 5.0), (6, 7.0)")
-
-        // SELECT from view - should adopt new types but preserve view comments.
-        checkAnswer(sql("SELECT * FROM v"), Seq(Row(4, 5.0), Row(6, 7.0)))
-
-        // Verify types changed but comments preserved.
-        val descAfterTypeChange = sql("DESCRIBE EXTENDED v").collect()
-        val c1Final = descAfterTypeChange.filter(r => r.getString(0) == "c1")
-        val c2Final = descAfterTypeChange.filter(r => r.getString(0) == "c2")
-        assert(c1Final.nonEmpty && c1Final(0).getString(1) == "bigint",
-          "c1 type should be updated to bigint")
-        assert(c2Final.nonEmpty && c2Final(0).getString(1) == "double",
-          "c2 type should be updated to double")
-        assert(c1Final.nonEmpty && c1Final(0).getString(2) == "user comment 1",
-          "c1 comment should still be 'user comment 1' (preserved)")
-        assert(c2Final.nonEmpty && c2Final(0).getString(2) == "user comment 2",
-          "c2 comment should still be 'user comment 2' (preserved)")
-      }
-    }
-  }
-
-  test("Schema evolution comments legacy behavior with preserveUserComments=false") {
-    withSQLConf(VIEW_SCHEMA_EVOLUTION_PRESERVE_USER_COMMENTS.key -> "false") {
-      withTable("t") {
-        withView("v") {
-          // Create table with comments.
-          sql("CREATE TABLE t (c1 INT COMMENT " +
-            "'table comment 1', c2 STRING COMMENT 'table comment 2')")
-          sql("INSERT INTO t VALUES (1, 'a'), (2, 'b')")
-
-          // Create view with schema evolution.
-          sql("CREATE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t")
-
-          // User manually changes view comments.
-          val catalog = spark.sessionState.catalog
-          val viewMeta = catalog.getTableMetadata(TableIdentifier("v"))
-          val newSchema = StructType(Seq(
-            StructField("c1", IntegerType, nullable = true).withComment("user comment 1"),
-            StructField("c2", StringType, nullable = true).withComment("user comment 2")
-          ))
-          catalog.alterTable(viewMeta.copy(schema = newSchema))
-
-          // Verify manually set comments.
-          val descManual = sql("DESCRIBE EXTENDED v").collect()
-          val c1CommentManual = descManual.filter(r => r.getString(0) == "c1")
-          val c2CommentManual = descManual.filter(r => r.getString(0) == "c2")
-          assert(c1CommentManual.nonEmpty && c1CommentManual(0).getString(2) == "user comment 1")
-          assert(c2CommentManual.nonEmpty && c2CommentManual(0).getString(2) == "user comment 2")
-
-          // SELECT from view (triggers ViewSyncSchemaToMetaStore).
-          checkAnswer(sql("SELECT * FROM v"), Seq(Row(1, "a"), Row(2, "b")))
-
-          // With flag=false, comments should REVERT to table comments (legacy behavior).
-          val descAfterSelect = sql("DESCRIBE EXTENDED v").collect()
-          val c1CommentAfter = descAfterSelect.filter(r => r.getString(0) == "c1")
-          val c2CommentAfter = descAfterSelect.filter(r => r.getString(0) == "c2")
-          assert(c1CommentAfter.nonEmpty && c1CommentAfter(0).getString(2) == "table comment 1",
-            "c1 comment should revert to 'table comment 1' (legacy behavior)")
-          assert(c2CommentAfter.nonEmpty && c2CommentAfter(0).getString(2) == "table comment 2",
-            "c2 comment should revert to 'table comment 2' (legacy behavior)")
-        }
-      }
-    }
-  }
-
-  test("Comments are preserved when table comment changes with preserveUserComments=true") {
-    withTable("t") {
-      withView("v") {
-        // Create table with initial comment.
-        sql("CREATE TABLE t (c1 INT COMMENT 'original table comment')")
-        sql("INSERT INTO t VALUES (1), (2)")
-
-        // Create view with schema evolution - inherits table comment.
-        sql("CREATE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t")
-
-        // Verify view has inherited the table comment.
-        val descInitial = sql("DESCRIBE EXTENDED v").collect()
-        val c1Initial = descInitial.filter(r => r.getString(0) == "c1")
-        assert(c1Initial.nonEmpty && c1Initial(0).getString(2) == "original table comment",
-          "View should inherit table comment")
-
-        // Change the table comment.
-        sql("ALTER TABLE t CHANGE COLUMN c1 c1 INT COMMENT 'new table comment'")
-
-        // Verify table comment changed.
-        val descTable = sql("DESCRIBE EXTENDED t").collect()
-        val c1Table = descTable.filter(r => r.getString(0) == "c1")
-        assert(c1Table.nonEmpty && c1Table(0).getString(2) == "new table comment",
-          "Table comment should be updated")
-
-        // SELECT from view (triggers ViewSyncSchemaToMetaStore).
-        checkAnswer(sql("SELECT * FROM v"), Seq(Row(1), Row(2)))
-
-        // Verify view still has the original inherited comment (frozen).
-        val descAfterSelect = sql("DESCRIBE EXTENDED v").collect()
-        val c1AfterSelect = descAfterSelect.filter(r => r.getString(0) == "c1")
-        assert(c1AfterSelect.nonEmpty &&
-          c1AfterSelect(0).getString(2) == "original table comment",
-          "View should preserve inherited comment even when table comment changes")
-      }
-    }
-  }
-
   def getShowCreateDDL(view: String, serde: Boolean = false): String = {
     val result = if (serde) {
       sql(s"SHOW CREATE TABLE $view AS SERDE")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
index 991a92dc8976b..b167dd13dcbd6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
@@ -17,16 +17,26 @@
 
 package org.apache.spark.sql.execution
 
+import java.lang.management.ManagementFactory
+import java.util.concurrent.{CountDownLatch, TimeUnit}
+
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration.Duration
+
 import org.apache.spark.{SparkEnv, SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Literal}
+import org.apache.spark.sql.catalyst.expressions.{
+  Attribute, AttributeReference, Expression, ExprId, Literal}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.plans.logical.Deduplicate
+import org.apache.spark.sql.catalyst.trees.LeafLike
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.sql.types.{DataType, IntegerType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.ThreadUtils
 
 class SparkPlanSuite extends SharedSparkSession {
 
@@ -168,6 +178,60 @@ class SparkPlanSuite extends SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-57041: waitForSubqueries must not hold the plan's monitor " +
+    "while awaiting subquery results") {
+    val enteredLatch = new CountDownLatch(1)
+    val releaseLatch = new CountDownLatch(1)
+
+    val subqueryExec = TestSubqueryExec(LocalTableScanExec(Nil, Nil, None))
+    val subqueryExpr = BlockingSubquery(subqueryExec, ExprId(0), enteredLatch, releaseLatch)
+    val plan = TestPlanWithSubquery(subqueryExpr)
+
+    val executor = ThreadUtils.newDaemonSingleThreadExecutor("test-wait-for-subqueries")
+    implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(executor)
+
+    plan.testPrepare()
+    val futureA = Future { plan.testWaitForSubqueries() }
+
+    try {
+      assert(enteredLatch.await(10, TimeUnit.SECONDS),
+        "Thread A did not enter updateResult() within 10s")
+
+      val threadB = new Thread(() => plan.synchronized {})
+      threadB.setDaemon(true)
+      threadB.start()
+
+      val bean = ManagementFactory.getThreadMXBean
+      val deadline = System.currentTimeMillis() + 5000L
+      var threadBBlocked = false
+      var waiting = true
+      while (waiting) {
+        if (!threadB.isAlive || System.currentTimeMillis() > deadline) {
+          waiting = false
+        } else {
+          val state = Option(bean.getThreadInfo(threadB.getId)).map(_.getThreadState).orNull
+          if (state == Thread.State.BLOCKED) {
+            threadBBlocked = true
+            waiting = false
+          } else if (state != null) {
+            Thread.sleep(1)
+          }
+        }
+      }
+
+      releaseLatch.countDown()
+      ThreadUtils.awaitResult(futureA, Duration(10, "seconds"))
+      threadB.join(5000L)
+
+      assert(!threadBBlocked,
+        "Deadlock: plan.this.synchronized could not be acquired while waitForSubqueries() was " +
+        "blocking on a subquery future. waitForSubqueries() must not hold the plan's monitor.")
+    } finally {
+      releaseLatch.countDown()
+      executor.shutdown()
+    }
+  }
 }
 
 case class ColumnarOp(child: SparkPlan) extends UnaryExecNode {
@@ -179,3 +243,41 @@ case class ColumnarOp(child: SparkPlan) extends UnaryExecNode {
   override protected def withNewChildInternal(newChild: SparkPlan): ColumnarOp =
     copy(child = newChild)
 }
+
+private case class TestSubqueryExec(child: SparkPlan) extends BaseSubqueryExec {
+  override def name: String = "TestSubqueryExec"
+  override def children: Seq[SparkPlan] = Seq(child)
+  override protected def doExecute(): RDD[InternalRow] = child.execute()
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[SparkPlan]): TestSubqueryExec = copy(child = newChildren.head)
+}
+
+private case class BlockingSubquery(
+    plan: BaseSubqueryExec,
+    exprId: ExprId,
+    enteredLatch: CountDownLatch,
+    releaseLatch: CountDownLatch)
+    extends ExecSubqueryExpression with LeafLike[Expression] {
+
+  override def dataType: DataType = IntegerType
+  override def nullable: Boolean = true
+  override def eval(input: InternalRow): Any = null
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw new UnsupportedOperationException("test only")
+  override def withNewPlan(plan: BaseSubqueryExec): ExecSubqueryExpression =
+    copy(plan = plan)
+
+  override def updateResult(): Unit = {
+    enteredLatch.countDown()
+    releaseLatch.await(30, TimeUnit.SECONDS)
+  }
+}
+
+private case class TestPlanWithSubquery(subqueryExpr: ExecSubqueryExpression)
+    extends LeafExecNode {
+  override def output: Seq[Attribute] = Nil
+  override protected def doExecute(): RDD[InternalRow] =
+    throw new UnsupportedOperationException("test only")
+  def testPrepare(): Unit = prepare()
+  def testWaitForSubqueries(): Unit = waitForSubqueries()
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnionCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnionCodegenSuite.scala
new file mode 100644
index 0000000000000..835c55fe4c402
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnionCodegenSuite.scala
@@ -0,0 +1,718 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import java.util.concurrent.{CountDownLatch, Executors, TimeUnit}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+/**
+ * Tests for `UnionExec` whole-stage codegen fusion: plan-shape assertions,
+ * correctness, type widening, metrics, and fallbacks.
+ */
+class UnionCodegenSuite extends QueryTest with SharedSparkSession {
+
+  // Union codegen fusion is off by default; turn it on for this suite.
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key, "true")
+
+  // ---------------------------------------------------------------------------
+  // Helpers
+  // ---------------------------------------------------------------------------
+
+  /** Convenience: spark.range returning a DataFrame (not Dataset[Long]). */
+  protected def rangeDF(end: Long): DataFrame = spark.range(end).toDF("id")
+  protected def rangeDF(start: Long, end: Long): DataFrame =
+    spark.range(start, end).toDF("id")
+  protected def rangeDF(start: Long, end: Long, step: Long,
+      numPartitions: Int): DataFrame =
+    spark.range(start, end, step, numPartitions).toDF("id")
+
+  private def wscgCount(df: DataFrame): Int =
+    df.queryExecution.executedPlan.collect {
+      case s: WholeStageCodegenExec => s
+    }.size
+
+  private def unionInsideWSCG(df: DataFrame): Boolean =
+    df.queryExecution.executedPlan.collect {
+      case w: WholeStageCodegenExec if w.find(_.isInstanceOf[UnionExec]).isDefined => w
+    }.nonEmpty
+
+  /** Run query with flag on, then flag off, assert results match. */
+  protected def assertFlagParity(buildDf: () => DataFrame): Unit = {
+    val onRows = buildDf().collect().toSeq
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+      checkAnswer(buildDf(), onRows)
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Configuration smoke
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: SQLConf keys are pinned under wholeStage namespace") {
+    // Pins the user-visible config keys so a future symbol rename does not
+    // silently change the published key string.
+    assert(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key ==
+      "spark.sql.codegen.wholeStage.union.enabled")
+    assert(SQLConf.WHOLESTAGE_UNION_MAX_CHILDREN.key ==
+      "spark.sql.codegen.wholeStage.union.maxChildren")
+  }
+
+  // ---------------------------------------------------------------------------
+  // Plan-shape tests
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: plain union with filter fuses into one WSCG stage") {
+    val df = rangeDF(100).union(rangeDF(100)).filter(col("id") > 0)
+    assert(wscgCount(df) == 1)
+    assert(unionInsideWSCG(df))
+  }
+
+  test("SPARK-56482: flag off restores pre-patch plan shape") {
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+      val df = rangeDF(100).union(rangeDF(100)).filter(col("id") > 0)
+      assert(wscgCount(df) >= 2)
+      assert(!unionInsideWSCG(df))
+    }
+  }
+
+  test("SPARK-56482: maxChildren exceeded falls back") {
+    withSQLConf(
+      SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "true",
+      SQLConf.WHOLESTAGE_UNION_MAX_CHILDREN.key -> "2") {
+      val df = rangeDF(10).union(rangeDF(10)).union(rangeDF(10))
+      assert(!unionInsideWSCG(df))
+    }
+  }
+
+  test("SPARK-56482: nested UnionExec - outer non-codegen, inner codegen") {
+    val inner = rangeDF(10).union(rangeDF(10)).filter(col("id") > 0)
+    val outer = inner.union(rangeDF(10)).filter(col("id") > 0)
+    val plan = outer.queryExecution.executedPlan
+    val fusedOuterUnions = plan.collect {
+      case w: WholeStageCodegenExec
+          if w.find {
+            case u: UnionExec => u.exists {
+              case inner: UnionExec => inner ne u
+              case _ => false
+            }
+            case _ => false
+          }.isDefined => w
+    }
+    assert(fusedOuterUnions.isEmpty,
+      "UnionExec with any descendant UnionExec must not be inside a WSCG stage")
+    assertFlagParity(() => inner.union(rangeDF(10)).filter(col("id") > 0).orderBy("id"))
+  }
+
+  test("SPARK-56482: indirect nested UnionExec behind Project is not fused") {
+    val inner1 = rangeDF(4).union(rangeDF(4))
+    val inner2 = rangeDF(4).union(rangeDF(4))
+    val outer = inner1.select(col("id") + 1 as "id")
+      .union(inner2.select(col("id") + 1 as "id"))
+    val plan = outer.queryExecution.executedPlan
+    val fused = plan.collect {
+      case w: WholeStageCodegenExec if w.find {
+        case u: UnionExec => u.exists {
+          case d: UnionExec => d ne u
+          case _ => false
+        }
+        case _ => false
+      }.isDefined => w
+    }
+    assert(fused.isEmpty,
+      "UnionExec with a non-direct descendant UnionExec must not be fused")
+    assertFlagParity(() =>
+      inner1.select(col("id") + 1 as "id")
+        .union(inner2.select(col("id") + 1 as "id")).orderBy("id"))
+  }
+
+  test("SPARK-56482: non-CodegenSupport child union produces correct results") {
+    // LocalTableScanExec may or may not be fused via InputAdapter wrapping
+    // depending on the planner. Just verify correctness.
+    val schema = StructType(Seq(StructField("id", LongType)))
+    val local = spark.createDataFrame(
+      java.util.Arrays.asList(Row(1L), Row(2L)), schema)
+    val df = local.union(rangeDF(10))
+    assert(df.count() == 12L)
+    assertFlagParity(() => local.union(rangeDF(10)).orderBy("id"))
+  }
+
+  test("SPARK-56482: WSCG count drops from N+1 to 1 (N=4)") {
+    def buildDf(): DataFrame = {
+      val dfs = (0 until 4).map(i => rangeDF(i * 10L, i * 10L + 10L))
+      dfs.reduce((a, b) => a.union(b)).filter(col("id") > 0)
+    }
+    assert(wscgCount(buildDf()) == 1)
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+      assert(wscgCount(buildDf()) >= 2)
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Correctness: type widening
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: type widening int -> long") {
+    assertFlagParity { () =>
+      val a = rangeDF(3).select(col("id").cast(IntegerType).as("v"))
+      val b = rangeDF(3).select(col("id").as("v"))
+      a.union(b).orderBy("v")
+    }
+  }
+
+  test("SPARK-56482: type widening decimal precision (equal scale)") {
+    assertFlagParity { () =>
+      val a = rangeDF(3).select(col("id").cast(DecimalType(5, 0)).as("v"))
+      val b = rangeDF(3).select(col("id").cast(DecimalType(10, 0)).as("v"))
+      a.union(b).orderBy("v")
+    }
+  }
+
+  test("SPARK-56482: type widening decimal precision (different scale)") {
+    // decimal(5,0) union decimal(10,2) -> decimal(10,2) per
+    // DecimalPrecisionTypeCoercion.widerDecimalType (scale=max(0,2)=2,
+    // precision=scale+max(p1-s1,p2-s2)=2+max(5,8)=10). WidenSetOperationTypes
+    // aligns both precision and scale, so the physical UnionExec sees
+    // matching child output dataTypes.
+    val build = () => {
+      val a = rangeDF(3).select(col("id").cast(DecimalType(5, 0)).as("v"))
+      val b = rangeDF(3).select(col("id").cast(DecimalType(10, 2)).as("v"))
+      a.union(b)
+    }
+    assert(unionInsideWSCG(build().filter(col("v") >= 0)),
+      "decimal precision/scale widening should still fuse into one WSCG stage")
+    assertFlagParity(() => build().orderBy("v"))
+  }
+
+  test("SPARK-56482: nullability widening top-level") {
+    assertFlagParity { () =>
+      val a = rangeDF(3).select(col("id").as("v"))
+      val b = spark.createDataFrame(
+        java.util.Arrays.asList(Row(null), Row(1L)),
+        StructType(Seq(StructField("v", LongType, nullable = true))))
+      a.union(b).orderBy("v")
+    }
+  }
+
+  test("SPARK-56482: widening union with filter fuses into one WSCG stage") {
+    // Plan-shape check that fusion is actually taken when types differ at the
+    // user level (forcing `WidenSetOperationTypes` to insert Project(Cast)
+    // above each child). Uses `.filter` rather than `.orderBy` so the plan
+    // has no Exchange and AQE does not wrap it.
+    val a = rangeDF(3).select(col("id").cast(IntegerType).as("v"))
+    val b = rangeDF(3).select(col("id").as("v"))
+    val df = a.union(b).filter(col("v") >= 0)
+    assert(unionInsideWSCG(df),
+      "widened-children Union should fuse with filter into a single WSCG stage")
+    checkAnswer(df, Seq(Row(0L), Row(1L), Row(2L), Row(0L), Row(1L), Row(2L)))
+  }
+
+  test("SPARK-56482: nested-nullability mismatch falls back to non-codegen") {
+    // Children differ only in nested struct nullability, which
+    // `WidenSetOperationTypes` does not align (see `allChildOutputDataTypesMatch`
+    // in `UnionExec`). The codegen path must fall back to `doExecute` rather
+    // than crash on the resulting type mismatch.
+    val structInner = StructType(Seq(StructField("f", IntegerType, nullable = false)))
+    val structOuterNotNull = StructType(Seq(StructField("s", structInner, nullable = false)))
+    val structInnerNullable =
+      StructType(Seq(StructField("f", IntegerType, nullable = true)))
+    val structOuterNullable =
+      StructType(Seq(StructField("s", structInnerNullable, nullable = false)))
+    val a = spark.createDataFrame(
+      java.util.Arrays.asList(Row(Row(1)), Row(Row(2))), structOuterNotNull)
+    val b = spark.createDataFrame(
+      java.util.Arrays.asList(Row(Row(3)), Row(Row(4))), structOuterNullable)
+    val df = a.union(b)
+    assert(!unionInsideWSCG(df),
+      "Nested-nullability mismatch must fall back to non-codegen")
+    val unionExec = df.queryExecution.executedPlan.collectFirst {
+      case u: UnionExec => u
+    }.get
+    assert(!unionExec.metrics.contains("numOutputRows"),
+      "numOutputRows metric must not be registered when fusion is denied")
+    checkAnswer(df,
+      Seq(Row(Row(1)), Row(Row(2)), Row(Row(3)), Row(Row(4))))
+  }
+
+  test("SPARK-56482: array containsNull mismatch falls back to non-codegen") {
+    // ArrayType.containsNull is the array analog of struct field nullability:
+    // skipped by `WidenSetOperationTypes`, so the codegen path must fall back.
+    val schemaNotNull =
+      StructType(Seq(StructField("a", ArrayType(IntegerType, containsNull = false))))
+    val schemaNullable =
+      StructType(Seq(StructField("a", ArrayType(IntegerType, containsNull = true))))
+    val a = spark.createDataFrame(
+      java.util.Arrays.asList(Row(java.util.Arrays.asList(1, 2))), schemaNotNull)
+    val b = spark.createDataFrame(
+      java.util.Arrays.asList(Row(java.util.Arrays.asList(3, 4))), schemaNullable)
+    val df = a.union(b)
+    assert(!unionInsideWSCG(df),
+      "Array containsNull mismatch must fall back to non-codegen")
+    val unionExec = df.queryExecution.executedPlan.collectFirst {
+      case u: UnionExec => u
+    }.get
+    assert(!unionExec.metrics.contains("numOutputRows"),
+      "numOutputRows metric must not be registered when fusion is denied")
+    val collectedArrays = df.collect()
+      .map(_.getList[Int](0).toArray.toSeq)
+      .toSet
+    assert(collectedArrays == Set(Seq(1, 2), Seq(3, 4)),
+      s"Expected the union of both array rows, got $collectedArrays")
+  }
+
+  // ---------------------------------------------------------------------------
+  // Correctness: N children, empty partitions, mixed partition counts
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: N = 3 children") {
+    assertFlagParity { () =>
+      val a = rangeDF(3)
+      val b = rangeDF(3, 6)
+      val c = rangeDF(6, 9)
+      a.union(b).union(c).orderBy("id")
+    }
+  }
+
+  test("SPARK-56482: N = 8 children") {
+    assertFlagParity { () =>
+      val dfs = (0 until 8).map(i => rangeDF(i * 5L, i * 5L + 5L))
+      dfs.reduce((a, b) => a.union(b)).orderBy("id")
+    }
+  }
+
+  test("SPARK-56482: empty-partition child") {
+    assertFlagParity { () =>
+      val a = rangeDF(0, 0, 1, numPartitions = 4)
+      val b = rangeDF(3)
+      a.union(b).orderBy("id")
+    }
+  }
+
+  test("SPARK-56482: mixed partition counts") {
+    assertFlagParity { () =>
+      val a = rangeDF(0, 10, 1, numPartitions = 2)
+      val b = rangeDF(10, 30, 1, numPartitions = 5)
+      a.union(b).orderBy("id")
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Correctness: RangeExec under fusion (the partitionIndex fix)
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: range union fuses correctly (childPartitionIndex)") {
+    assertFlagParity { () =>
+      rangeDF(0, 10, 1, numPartitions = 2)
+        .union(rangeDF(10, 20, 1, numPartitions = 2))
+        .orderBy("id")
+    }
+  }
+
+  test("SPARK-56482: range(2).union(range(2)) returns 4 rows") {
+    val df = rangeDF(2).union(rangeDF(2))
+    assert(df.count() == 4)
+    assert(df.collect().map(_.getLong(0)).sorted.toSeq == Seq(0, 0, 1, 1))
+  }
+
+  test("SPARK-56482: three RangeExec children fuse correctly") {
+    assertFlagParity { () =>
+      val a = rangeDF(0, 5, 1, numPartitions = 2)
+      val b = rangeDF(5, 10, 1, numPartitions = 3)
+      val c = rangeDF(10, 15, 1, numPartitions = 2)
+      a.union(b).union(c).orderBy("id")
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Metrics
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: numOutputRows metric equals total child rows") {
+    val df = rangeDF(3).union(rangeDF(5))
+    df.collect()
+    val unionExec = df.queryExecution.executedPlan.collectFirst {
+      case u: UnionExec => u
+    }.get
+    assert(unionExec.metrics("numOutputRows").value == 8L)
+  }
+
+  test("SPARK-56482: numOutputRows with mixed partition counts") {
+    val a = rangeDF(0, 40, 1, numPartitions = 4)
+    val b = rangeDF(0, 200, 1, numPartitions = 2)
+    val df = a.union(b)
+    df.collect()
+    val unionExec = df.queryExecution.executedPlan.collectFirst {
+      case u: UnionExec => u
+    }.get
+    assert(unionExec.metrics("numOutputRows").value == 240L)
+  }
+
+  // ---------------------------------------------------------------------------
+  // LIMIT
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: LIMIT above Union returns exactly K rows") {
+    assertFlagParity { () =>
+      val a = rangeDF(100)
+      val b = rangeDF(100, 200)
+      a.union(b).limit(5)
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // needCopyResult
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: needCopyResult all-scan children is false") {
+    val df = rangeDF(10).union(rangeDF(10))
+    df.collect()
+    val unionExec = df.queryExecution.executedPlan.collectFirst {
+      case u: UnionExec => u
+    }.get
+    assert(!unionExec.needCopyResult,
+      "UnionExec with scan-only children should not need row copy")
+  }
+
+  test("SPARK-56482: BHJ child union correctness") {
+    // Verify that a union with a BHJ child produces correct results
+    // under both flag states (the needCopyResult override ensures
+    // UnsafeRow buffers aren't aliased across multi-row BHJ output).
+    withSQLConf(
+      SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10485760") {
+      val left = rangeDF(100).select(col("id").as("lk"), col("id").as("lv"))
+      val right = rangeDF(100).select(col("id").as("rk"))
+      val bhj = left.join(broadcast(right), col("lk") === col("rk"))
+        .select("lk", "lv")
+      val df = bhj.union(
+        rangeDF(100).select(col("id").as("lk"), col("id").as("lv")))
+      val flagOn = df.collect().toSeq
+      withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+        checkAnswer(df, flagOn)
+      }
+    }
+  }
+
+  test("SPARK-56482: BHJ multi-row child feeds downstream agg correctly") {
+    withSQLConf(
+      SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10485760") {
+      val probe = rangeDF(10).select(col("id").as("k"))
+      val build = rangeDF(20)
+        .select((col("id") % 5).as("k"), col("id").as("v"))
+      val bhj = probe.join(broadcast(build), "k")
+      val df = bhj.union(
+        rangeDF(0).select(col("id").as("k"), col("id").as("v")))
+      val agg = df.groupBy("k").count().orderBy("k")
+      val flagOn = agg.collect()
+      withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+        checkAnswer(agg, flagOn.toSeq)
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Structural denylist: SortMergeJoin child
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: SMJ child union correctness") {
+    // SMJ is in the structural denylist (multi-RDD codegen), so
+    // UnionExec should fall back for that child. Verify correctness.
+    withSQLConf(
+      SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val left = rangeDF(100).select(col("id").as("k"))
+      val right = rangeDF(100).select(col("id").as("k"))
+      val smj = left.join(right, "k")
+      val df = smj.union(rangeDF(100).select(col("id").as("k")))
+      val flagOn = df.collect().toSeq
+      withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+        checkAnswer(df, flagOn)
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Columnar fallback
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: parquet union correctness") {
+    // Verify unions of Parquet-backed DataFrames produce correct results
+    // regardless of columnar/row mode and codegen flag state.
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      rangeDF(100).write.parquet(path)
+      val df = spark.read.parquet(path).union(spark.read.parquet(path))
+      assert(df.count() == 200L)
+      val flagOn = df.orderBy("id").collect().toSeq
+      withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+        checkAnswer(df.orderBy("id"), flagOn)
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Cached DataFrame child
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: cached DataFrame child correctness across flag states") {
+    val cached = rangeDF(100).cache()
+    try {
+      cached.count()
+      val df = cached.union(rangeDF(100, 200))
+      val flagOn = df.collect().toSet
+      withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+        assert(df.collect().toSet == flagOn)
+      }
+    } finally {
+      cached.unpersist()
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Reused subquery
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: reused subquery across Union children") {
+    val t = "union_codegen_sub_test"
+    rangeDF(100).createOrReplaceTempView(t)
+    try {
+      val q =
+        s"""
+           |SELECT id FROM $t WHERE id IN (SELECT MAX(id) FROM $t)
+           |UNION ALL
+           |SELECT id FROM $t WHERE id IN (SELECT MAX(id) FROM $t)
+         """.stripMargin
+      val flagOn = spark.sql(q).collect().toSet
+      withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+        assert(spark.sql(q).collect().toSet == flagOn)
+      }
+    } finally {
+      spark.catalog.dropTempView(t)
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // storeAssignmentPolicy regression guard
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: storeAssignmentPolicy regression guard") {
+    Seq("LEGACY", "STRICT", "ANSI").foreach { policy =>
+      withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> policy) {
+        val df = rangeDF(3).union(rangeDF(3, 6))
+        assert(df.collect().map(_.get(0).asInstanceOf[Long]).toSet == (0L to 5L).toSet,
+          s"policy=$policy")
+      }
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Very large N (fallback via hugeMethodLimit)
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: over-cap falls back to per-child stages") {
+    // Explicit cap so the assertion is robust to future default changes.
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_MAX_CHILDREN.key -> "16") {
+      val n = 32
+      val dfs = (0 until n).map(i => rangeDF(i.toLong, i.toLong + 1L))
+      val unioned = dfs.reduce((x, y) => x.union(y))
+      assert(unioned.count() == n.toLong)
+      assert(!unionInsideWSCG(unioned))
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Runtime toggle
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: flag flip takes effect across QueryExecutions") {
+    def buildDf(): DataFrame =
+      rangeDF(100).union(rangeDF(100)).filter(col("id") > 0)
+
+    assert(wscgCount(buildDf()) == 1)
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+      assert(wscgCount(buildDf()) >= 2)
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // supportCodegenFailureReason branch coverage
+  // ---------------------------------------------------------------------------
+
+  test("SPARK-56482: Nondeterministic child causes codegen fallback") {
+    // rand() is Nondeterministic; union fusion should be denied
+    val a = rangeDF(10).select(col("id"), rand(42).as("r"))
+    val b = rangeDF(10).select(col("id"), rand(43).as("r"))
+    val df = a.union(b)
+    assert(!unionInsideWSCG(df),
+      "Union with Nondeterministic child must not be inside WSCG")
+    // Verify correctness despite fallback
+    assertFlagParity(() => a.union(b).orderBy("id"))
+  }
+
+  test("SPARK-56482: monotonically_increasing_id child causes codegen fallback") {
+    val a = rangeDF(10).select(col("id"), monotonically_increasing_id().as("mid"))
+    val b = rangeDF(10).select(col("id"), monotonically_increasing_id().as("mid"))
+    val df = a.union(b)
+    assert(!unionInsideWSCG(df),
+      "Union with monotonically_increasing_id child must not be inside WSCG")
+  }
+
+  test("SPARK-56482: column pruning works under union codegen (usedInputs=empty)") {
+    // Union of 2-column children, parent selects only 1 column
+    val a = rangeDF(10).select(col("id"), (col("id") * 2).as("v"))
+    val b = rangeDF(10, 20).select(col("id"), (col("id") * 3).as("v"))
+    val df = a.union(b).select("id").orderBy("id")
+    val flagOn = df.collect()
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+      checkAnswer(df, flagOn.toSeq)
+    }
+  }
+
+  test("SPARK-56482: numOutputRows with empty union children") {
+    val a = rangeDF(0, 0, 1, numPartitions = 2)
+    val b = rangeDF(0, 0, 1, numPartitions = 3)
+    val df = a.union(b)
+    df.collect()
+    val unionExec = df.queryExecution.executedPlan.collectFirst {
+      case u: UnionExec => u
+    }
+    // UnionExec may or may not exist depending on optimizer elimination
+    unionExec.foreach { u =>
+      assert(u.metrics("numOutputRows").value == 0L,
+        "numOutputRows should be 0 for all-empty union")
+    }
+  }
+
+  test("SPARK-56482: partitioning-aware union falls back to non-codegen") {
+    // After repartition, both children expose a `HashPartitioning` on the same key,
+    // so `UnionExec.outputPartitioning` is non-Unknown and the codegen path is denied.
+    // AQE is disabled here so the executedPlan exposes the UnionExec directly
+    // (under AQE the plan is wrapped in `AdaptiveSparkPlanExec`, which does not
+    // surface its inputPlan via `children`).
+    withSQLConf(
+      SQLConf.UNION_OUTPUT_PARTITIONING.key -> "true",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      val a = rangeDF(100).repartition(4, col("id"))
+      val b = rangeDF(100, 200).repartition(4, col("id"))
+      val df = a.union(b)
+      assert(!unionInsideWSCG(df),
+        "Partitioning-aware union must not fuse into WSCG")
+      val unionExec = df.queryExecution.executedPlan.collectFirst {
+        case u: UnionExec => u
+      }.get
+      assert(!unionExec.metrics.contains("numOutputRows"),
+        "numOutputRows metric must not be registered on the partitioning-aware path")
+      assertFlagParity(() => a.union(b).orderBy("id"))
+    }
+  }
+
+  test("SPARK-56482: input_file_name child fuses (Nondeterministic but partition-index-free)") {
+    // `InputFileName` is `Nondeterministic` but reads from `InputFileBlockHolder`
+    // (a per-task thread-local) and does not embed `partitionIndex`. The gate's
+    // narrow check should let this fuse.
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      rangeDF(20).write.parquet(path)
+      val a = spark.read.parquet(path).select(col("id"), input_file_name().as("f"))
+      val b = spark.read.parquet(path).select(col("id"), input_file_name().as("f"))
+      val df = a.union(b).filter(col("id") > 0)
+      assert(unionInsideWSCG(df),
+        "Union with input_file_name child should fuse into WSCG")
+      assertFlagParity(() => a.union(b).orderBy("id", "f"))
+    }
+  }
+
+  test("SPARK-56482: union with sample children fuses (or falls back) without crashing") {
+    // `SampleExec.doConsume` reads `currentPartitionIndexVar` from inside an
+    // `addMutableState` initializer, which is emitted into the state-init
+    // function rather than the per-child helper. The bound expression must
+    // therefore resolve in any emission scope, not just inside the helper.
+    val a = rangeDF(20).sample(false, 0.5, 1L)
+    val b = rangeDF(20).sample(false, 0.5, 1L)
+    val df = a.union(b).filter(col("id") > 0)
+    df.collect()
+    assertFlagParity(() => a.union(b).orderBy("id"))
+  }
+
+  test("SPARK-57196: concurrent codegen of a shared UnionExec stage is thread-safe") {
+    // A single `UnionExec` instance can have its whole-stage codegen driven by
+    // more than one thread at a time: a reused exchange/subquery is generated
+    // concurrently with the main plan, and async subquery/DPP execution can
+    // overlap a driver-side `doCodeGen`. The fusion path kept per-emission state
+    // (`currentEmittingChild`) in a mutable field on the shared instance, so a
+    // racing `doProduce` could reset it to -1 while another thread was still in
+    // `doConsume`, tripping the "UnionExec.doConsume invoked outside doProduce
+    // emission window" requirement. Generating the same fused stage from many
+    // threads reproduces the race.
+    val df = rangeDF(100).union(rangeDF(100)).filter(col("id") > 0)
+    assert(unionInsideWSCG(df))
+    val wscg = df.queryExecution.executedPlan.collectFirst {
+      case w: WholeStageCodegenExec if w.find(_.isInstanceOf[UnionExec]).isDefined => w
+    }.getOrElse(fail("expected a fused UnionExec stage"))
+
+    val numThreads = 8
+    val iterations = 200
+    val pool = Executors.newFixedThreadPool(numThreads)
+    val errors = java.util.Collections.synchronizedList(new java.util.ArrayList[Throwable]())
+    try {
+      val startLatch = new CountDownLatch(1)
+      val futures = (0 until numThreads).map { _ =>
+        pool.submit(new Runnable {
+          override def run(): Unit = {
+            startLatch.await()
+            var n = 0
+            while (n < iterations) {
+              try {
+                wscg.doCodeGen()
+              } catch {
+                case t: Throwable => errors.add(t)
+              }
+              n += 1
+            }
+          }
+        })
+      }
+      startLatch.countDown()
+      futures.foreach(_.get(60, TimeUnit.SECONDS))
+    } finally {
+      pool.shutdownNow()
+    }
+    assert(errors.isEmpty,
+      "concurrent doCodeGen on a shared UnionExec stage raced:\n" +
+        errors.toArray.map(_.toString).mkString("\n"))
+  }
+}
+
+/** Runs [[UnionCodegenSuite]] with ANSI mode enabled. */
+class UnionCodegenAnsiSuite extends UnionCodegenSuite {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "true")
+}
+
+/** Runs [[UnionCodegenSuite]] with adaptive query execution enabled. */
+class UnionCodegenAqeSuite extends UnionCodegenSuite {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true")
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 069bfc72e106e..a83d5c99bb5d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -777,6 +777,29 @@ class WholeStageCodegenSuite extends SharedSparkSession
     }
   }
 
+  test("SPARK-56134: Codegen working for empty output") {
+    // Create a balanced tree of AND conditions. This prevents generating a very deep tree,
+    // which can cause stack overflow.
+    def balancedAnd(cols: Seq[String]): String = cols match {
+      case Seq(single) => single
+      case seq =>
+        val (left, right) = seq.splitAt(seq.length / 2)
+        balancedAnd(left) + " and " + balancedAnd(right)
+    }
+
+    withTempPath { dir =>
+        val path = dir.getCanonicalPath
+        sql("select array(0) as value from range(0, 1, 1, 1)")
+          .write.mode(SaveMode.Overwrite).parquet(path)
+
+        val numConditions = 1000
+        val conditions = (0 until numConditions).map(i => s"value <= array($i)")
+        val condition = balancedAnd(conditions)
+        val df = spark.read.parquet(path).filter(condition).selectExpr()
+        assert(df.limit(1).selectExpr("count(*)").collect() === Array(Row(1)))
+    }
+  }
+
   test("SPARK-25767: Lazy evaluated stream of expressions handled correctly") {
     val a = Seq(1).toDF("key")
     val b = Seq((1, "a")).toDF("key", "value")
@@ -1163,4 +1186,89 @@ class WholeStageCodegenSuite extends SharedSparkSession
       }
     }
   }
+
+  test("SPARK-56032: FilterExec skips CSE codegen when there is no common subexpression") {
+    // When otherPreds share no common subexpression, the CSE codegen path provides no benefit
+    // but would still eagerly evaluate every referenced input column at the top of the row loop
+    // (the inputVarsEvalCode prologue), defeating the lazy, short-circuiting column loads of the
+    // non-CSE path. Verify that with CSE enabled we fall back to the exact same generated code as
+    // with CSE disabled, so no column is decoded for rows an earlier predicate would reject.
+    val schema = StructType(Seq(
+      StructField("a", IntegerType, nullable = true),
+      StructField("b", IntegerType, nullable = true)))
+    val data = spark.sparkContext.parallelize(Seq(
+      Row(1, 5), Row(null, 3), Row(4, null), Row(5, 6), Row(7, 8), Row(2, 3)))
+    val expected = Seq(Row(5, 6), Row(7, 8))
+
+    def filterCode(cseEnabled: Boolean): String = {
+      withSQLConf(
+        SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> cseEnabled.toString,
+        SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        val df = spark.createDataFrame(data, schema)
+        // `a > 4` and `b > 4` reference different columns and share no subexpression.
+        val filtered = df.where("a IS NOT NULL AND a > 4 AND b > 4")
+        val plan = filtered.queryExecution.executedPlan
+        assert(plan.exists(_.isInstanceOf[WholeStageCodegenExec]),
+          "Filter should be in whole-stage codegen")
+        checkAnswer(filtered, expected)
+        codegenString(plan)
+      }
+    }
+
+    // Each `createDataFrame` mints fresh attribute exprIds (e.g. `a#16`), which appear in the
+    // plan-tree header of the codegen dump but not in the generated Java. Normalize them away so
+    // the comparison reflects the generated code, not the id counter.
+    def normalize(code: String): String = code.replaceAll("#\\d+", "#")
+    assert(normalize(filterCode(cseEnabled = true)) == normalize(filterCode(cseEnabled = false)),
+      "With no common subexpression, CSE-enabled FilterExec codegen should be identical to " +
+        "CSE-disabled codegen (i.e. fall back to the lazy, short-circuiting non-CSE path)")
+  }
+
+  test("SPARK-56032: subexpressionElimination.filterExec.enabled gates FilterExec CSE " +
+    "independently of subexpression elimination") {
+    // The conf disables CSE specifically for FilterExec while leaving subexpression elimination
+    // enabled elsewhere. With a genuine common subexpression in the predicates, turning the conf
+    // off should make FilterExec fall back to the lazy non-CSE path (re-evaluating the shared
+    // subexpression per use), matching the code generated when CSE is globally disabled.
+    val schema = StructType(Seq(
+      StructField("a", DayTimeIntervalType(), nullable = true),
+      StructField("b", DayTimeIntervalType(), nullable = true)))
+    val data = spark.sparkContext.parallelize(Seq(
+      Row(Duration.ofDays(1), Duration.ofDays(5)),
+      Row(Duration.ofDays(5), Duration.ofDays(6)),
+      Row(Duration.ofDays(2), Duration.ofDays(3))))
+    val expected = data.collect().toSeq
+
+    // `a + b` appears three times in the predicate, so it is a CSE candidate. We count `addExact`
+    // occurrences in the generated code: the CSE path evaluates it once, the lazy path per use.
+    def filterCode(filterExecCseEnabled: Boolean): String = {
+      withSQLConf(
+        // Subexpression elimination stays globally on; only the FilterExec gate flips.
+        SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
+        SQLConf.SUBEXPRESSION_ELIMINATION_FILTER_EXEC_ENABLED.key ->
+          filterExecCseEnabled.toString,
+        SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        val df = spark.createDataFrame(data, schema)
+        val filtered = df.where(
+          "a IS NOT NULL AND (a + b) > INTERVAL '3' DAY " +
+            "AND (a + b) < INTERVAL '15' DAY AND (a + b) != INTERVAL '10' DAY")
+        val plan = filtered.queryExecution.executedPlan
+        assert(plan.exists(_.isInstanceOf[WholeStageCodegenExec]),
+          "Filter should be in whole-stage codegen")
+        checkAnswer(filtered, expected)
+        codegenString(plan)
+      }
+    }
+
+    val addExactPattern = "addExact".r
+    val enabledCount = addExactPattern.findAllIn(filterCode(filterExecCseEnabled = true)).length
+    val disabledCount = addExactPattern.findAllIn(filterCode(filterExecCseEnabled = false)).length
+    // With the gate on, CSE collapses the repeated `a + b` evaluations; with the gate off,
+    // FilterExec falls back to the lazy path that re-evaluates per use.
+    assert(enabledCount < disabledCount,
+      s"subexpressionElimination.filterExec.enabled should reduce repeated evaluation: " +
+        s"addExact appears $enabledCount times when enabled vs $disabledCount times when disabled")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index b2c7745d0a9ab..50322905f29f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -29,9 +29,10 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListe
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, EqualTo, IsNull, Or}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.{Inner, LeftAnti}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Join, JoinHint, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.classic.Strategy
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
@@ -40,7 +41,7 @@ import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ENSURE_REQUIREMENTS, Exchange, REPARTITION_BY_COL, REPARTITION_BY_NUM, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
-import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec}
+import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, HashedRelationBroadcastMode, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter
 import org.apache.spark.sql.execution.streaming.runtime.{MemoryStream, StreamingQueryWrapper}
 import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
@@ -1634,6 +1635,47 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("LogicalQueryStageStrategy keeps hashed broadcast modes separate") {
+    val left = LocalRelation(AttributeReference("l", IntegerType)())
+    val right = LocalRelation(AttributeReference("r", IntegerType)())
+
+    def broadcastStage(plan: LocalRelation, isNullAware: Boolean): LogicalQueryStage = {
+      val scan = LocalTableScanExec(plan.output, Nil, None)
+      val exchange = BroadcastExchangeExec(
+        HashedRelationBroadcastMode(plan.output, isNullAware), scan)
+      LogicalQueryStage(plan, BroadcastQueryStageExec(0, exchange, exchange))
+    }
+
+    val equiJoin = Join(
+      broadcastStage(left, isNullAware = false),
+      right,
+      Inner,
+      Some(EqualTo(left.output.head, right.output.head)),
+      JoinHint.NONE)
+    assert(LogicalQueryStageStrategy(equiJoin).head.isInstanceOf[BroadcastHashJoinExec])
+
+    val equiJoinWithNullAwareStage = equiJoin.copy(
+      left = broadcastStage(left, isNullAware = true))
+    assert(LogicalQueryStageStrategy(equiJoinWithNullAwareStage).isEmpty)
+
+    val naajCondition = Or(
+      EqualTo(left.output.head, right.output.head),
+      IsNull(EqualTo(left.output.head, right.output.head)))
+    val nullAwareAntiJoin = Join(
+      left,
+      broadcastStage(right, isNullAware = true),
+      LeftAnti,
+      Some(naajCondition),
+      JoinHint.NONE)
+    val naaj = LogicalQueryStageStrategy(nullAwareAntiJoin).head
+      .asInstanceOf[BroadcastHashJoinExec]
+    assert(naaj.isNullAwareAntiJoin)
+
+    val nullAwareAntiJoinWithRegularStage = nullAwareAntiJoin.copy(
+      right = broadcastStage(right, isNullAware = false))
+    assert(LogicalQueryStageStrategy(nullAwareAntiJoinWithRegularStage).isEmpty)
+  }
+
   test("SPARK-32717: AQEOptimizer should respect excludedRules configuration") {
     withSQLConf(
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowCompressionUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowCompressionUtilsSuite.scala
new file mode 100644
index 0000000000000..50a980dcf37a9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowCompressionUtilsSuite.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.arrow
+
+import java.io.ByteArrayOutputStream
+import java.nio.channels.Channels
+
+import org.apache.arrow.vector.{VarCharVector, VectorSchemaRoot, VectorUnloader}
+import org.apache.arrow.vector.compression.NoCompressionCodec
+import org.apache.arrow.vector.ipc.WriteChannel
+import org.apache.arrow.vector.ipc.message.MessageSerializer
+
+import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.util.ArrowUtils
+
+class ArrowCompressionUtilsSuite extends SparkFunSuite {
+
+  // Serializes one Arrow record batch compressed at the given zstd level and returns its size.
+  private def compressedSize(level: Int): Int = {
+    val sparkSchema = StructType(Seq(StructField("str_col", StringType)))
+    val arrowSchema = ArrowUtils.toArrowSchema(sparkSchema, "UTC", true, false)
+    val allocator =
+      ArrowUtils.rootAllocator.newChildAllocator(this.getClass.getSimpleName, 0, Long.MaxValue)
+    val root = VectorSchemaRoot.create(arrowSchema, allocator)
+    try {
+      root.allocateNew()
+      val strVector = root.getVector("str_col").asInstanceOf[VarCharVector]
+      // Compressible but non-trivial corpus: shared structure with per-row variation, so
+      // different zstd levels produce measurably different output sizes.
+      (0 until 2000).foreach { i =>
+        val value =
+          s"user-$i@example.com,record-${i % 97},payload-${i * 2654435761L}".getBytes("UTF-8")
+        strVector.setSafe(i, value, 0, value.length)
+      }
+      root.setRowCount(2000)
+      val codec = ArrowCompressionUtils.createCompressionCodec("zstd", level)
+      val recordBatch = new VectorUnloader(root, true, codec, true).getRecordBatch()
+      try {
+        val out = new ByteArrayOutputStream()
+        MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), recordBatch)
+        out.size()
+      } finally {
+        recordBatch.close()
+      }
+    } finally {
+      root.close()
+      allocator.close()
+    }
+  }
+
+  test("SPARK-57383: zstd compression level is honored by createCompressionCodec") {
+    // Regression test: the codec used to be rebuilt through the single-argument factory
+    // overload, which silently dropped the level, so every configured level compressed at the
+    // zstd default. Compress the same batch at an ultra-fast negative level and at a high level
+    // and assert the high level yields a strictly smaller payload.
+    val fastSize = compressedSize(-5)
+    val highSize = compressedSize(19)
+    assert(highSize < fastSize,
+      s"zstd level 19 should compress smaller than level -5, " +
+        s"got level 19 -> $highSize bytes vs level -5 -> $fastSize bytes; " +
+        "equal sizes mean the configured level is being ignored")
+  }
+
+  test("codec name 'none' maps to the no-op codec and unknown names fail") {
+    assert(ArrowCompressionUtils.createCompressionCodec("none", 3) ===
+      NoCompressionCodec.INSTANCE)
+    val e = intercept[SparkException] {
+      ArrowCompressionUtils.createCompressionCodec("snappy", 3)
+    }
+    assert(e.getMessage.contains("Unsupported Arrow compression codec: snappy"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
index c3e9af54d431e..eb826a9e23570 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.util.{Geography => InternalGeography, Geome
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized._
-import org.apache.spark.unsafe.types.{CalendarInterval, GeographyVal, GeometryVal, UTF8String}
+import org.apache.spark.unsafe.types.{BinaryView, CalendarInterval, UTF8String}
 import org.apache.spark.util.MaybeNull
 
 class ArrowWriterSuite extends SparkFunSuite {
@@ -61,8 +61,8 @@ class ArrowWriterSuite extends SparkFunSuite {
 
       val dataModified = data.map { datum =>
         dt match {
-          case _: GeometryType => datum.asInstanceOf[GeometryVal].getBytes
-          case _: GeographyType => datum.asInstanceOf[GeographyVal].getBytes
+          case _: GeometryType => datum.asInstanceOf[BinaryView].getBytes
+          case _: GeographyType => datum.asInstanceOf[BinaryView].getBytes
           case _ => datum
         }
       }
@@ -89,8 +89,8 @@ class ArrowWriterSuite extends SparkFunSuite {
             case _: YearMonthIntervalType => reader.getInt(rowId)
             case _: DayTimeIntervalType => reader.getLong(rowId)
             case CalendarIntervalType => reader.getInterval(rowId)
-            case _: GeometryType => reader.getGeometry(rowId).getBytes
-            case _: GeographyType => reader.getGeography(rowId).getBytes
+            case _: GeometryType => reader.getBinaryView(rowId).getBytes
+            case _: GeographyType => reader.getBinaryView(rowId).getBytes
           }
           assert(value === datum)
       }
@@ -105,15 +105,23 @@ class ArrowWriterSuite extends SparkFunSuite {
     }
 
     val geographies = wkbs.map(x => InternalGeography.fromWkb(x, 4326).getValue)
+    val geographies4267 = wkbs.map(x => InternalGeography.fromWkb(x, 4267).getValue)
+    val geographies4269 = wkbs.map(x => InternalGeography.fromWkb(x, 4269).getValue)
     val geometries = wkbs.map(x => InternalGeometry.fromWkb(x, 0).getValue)
     val mixedGeometries = wkbs.zip(Seq(0, 4326)).map {
       case (g, srid) => InternalGeometry.fromWkb(g, srid).getValue
     }
+    val mixedGeographies = wkbs.zip(Seq(4267, 4269)).map {
+      case (g, srid) => InternalGeography.fromWkb(g, srid).getValue
+    }
 
     check(GeometryType(0), geometries)
     check(GeographyType(4326), geographies)
+    check(GeographyType(4267), geographies4267)
+    check(GeographyType(4269), geographies4269)
     check(GeometryType("ANY"), mixedGeometries)
     check(GeographyType("ANY"), geographies)
+    check(GeographyType("ANY"), mixedGeographies)
     check(BooleanType, Seq(true, null, false))
     check(ByteType, Seq(1.toByte, 2.toByte, null, 4.toByte))
     check(ShortType, Seq(1.toShort, 2.toShort, null, 4.toShort))
@@ -175,27 +183,27 @@ class ArrowWriterSuite extends SparkFunSuite {
           assert(expectedStruct.getInt(0) === actualStruct.getInt(0))
           assert(expectedStruct.getInt(2) === actualStruct.getInt(2))
 
-          if (expectedStruct.getGeography(1) == null ||
-            actualStruct.getGeography(1) == null) {
-            assert(expectedStruct.getGeography(1) == null && actualStruct.getGeography(1) == null)
+          if (expectedStruct.getBinaryView(1) == null ||
+            actualStruct.getBinaryView(1) == null) {
+            assert(expectedStruct.getBinaryView(1) == null && actualStruct.getBinaryView(1) == null)
           } else {
-            assert(expectedStruct.getGeography(1).getBytes ===
-              actualStruct.getGeography(1).getBytes)
+            assert(expectedStruct.getBinaryView(1).getBytes ===
+              actualStruct.getBinaryView(1).getBytes)
           }
-          if (expectedStruct.getGeography(3) == null ||
-            actualStruct.getGeography(3) == null) {
-            assert(expectedStruct.getGeography(3) == null && actualStruct.getGeography(3) == null)
+          if (expectedStruct.getBinaryView(3) == null ||
+            actualStruct.getBinaryView(3) == null) {
+            assert(expectedStruct.getBinaryView(3) == null && actualStruct.getBinaryView(3) == null)
           } else {
-            assert(expectedStruct.getGeography(3).getBytes ===
-              actualStruct.getGeography(3).getBytes)
+            assert(expectedStruct.getBinaryView(3).getBytes ===
+              actualStruct.getBinaryView(3).getBytes)
           }
 
           if (datum.getArray(1) == null ||
             internalRow.getArray(1) == null) {
             assert(internalRow.getArray(1) == null && datum.getArray(1) == null)
           } else {
-            internalRow.getArray(1).toSeq[GeographyVal](GeographyType(4326))
-              .zip(datum.getArray(1).toSeq[GeographyVal](GeographyType(4326))).foreach {
+            internalRow.getArray(1).toSeq[BinaryView](GeographyType(4326))
+              .zip(datum.getArray(1).toSeq[BinaryView](GeographyType(4326))).foreach {
                 case (actual, expected) =>
                   assert(actual.getBytes === expected.getBytes)
               }
@@ -207,8 +215,8 @@ class ArrowWriterSuite extends SparkFunSuite {
           } else {
             assert(internalRow.getMap(2).keyArray().toSeq(StringType) ===
               datum.getMap(2).keyArray().toSeq(StringType))
-            internalRow.getMap(2).valueArray().toSeq[GeographyVal](GeographyType("ANY"))
-              .zip(datum.getMap(2).valueArray().toSeq[GeographyVal](GeographyType("ANY"))).foreach {
+            internalRow.getMap(2).valueArray().toSeq[BinaryView](GeographyType("ANY"))
+              .zip(datum.getMap(2).valueArray().toSeq[BinaryView](GeographyType("ANY"))).foreach {
                 case (actual, expected) =>
                   assert((actual == null && expected == null) ||
                     actual.getBytes === expected.getBytes)
@@ -293,27 +301,27 @@ class ArrowWriterSuite extends SparkFunSuite {
           assert(expectedStruct.getInt(0) === actualStruct.getInt(0))
           assert(expectedStruct.getInt(2) === actualStruct.getInt(2))
 
-          if (expectedStruct.getGeometry(1) == null ||
-            actualStruct.getGeometry(1) == null) {
-            assert(expectedStruct.getGeometry(1) == null && actualStruct.getGeometry(1) == null)
+          if (expectedStruct.getBinaryView(1) == null ||
+            actualStruct.getBinaryView(1) == null) {
+            assert(expectedStruct.getBinaryView(1) == null && actualStruct.getBinaryView(1) == null)
           } else {
-            assert(expectedStruct.getGeometry(1).getBytes ===
-              actualStruct.getGeometry(1).getBytes)
+            assert(expectedStruct.getBinaryView(1).getBytes ===
+              actualStruct.getBinaryView(1).getBytes)
           }
-          if (expectedStruct.getGeometry(3) == null ||
-            actualStruct.getGeometry(3) == null) {
-            assert(expectedStruct.getGeometry(3) == null && actualStruct.getGeometry(3) == null)
+          if (expectedStruct.getBinaryView(3) == null ||
+            actualStruct.getBinaryView(3) == null) {
+            assert(expectedStruct.getBinaryView(3) == null && actualStruct.getBinaryView(3) == null)
           } else {
-            assert(expectedStruct.getGeometry(3).getBytes ===
-              actualStruct.getGeometry(3).getBytes)
+            assert(expectedStruct.getBinaryView(3).getBytes ===
+              actualStruct.getBinaryView(3).getBytes)
           }
 
           if (datum.getArray(1) == null ||
             internalRow.getArray(1) == null) {
             assert(internalRow.getArray(1) == null && datum.getArray(1) == null)
           } else {
-            internalRow.getArray(1).toSeq[GeometryVal](GeometryType(0))
-              .zip(datum.getArray(1).toSeq[GeometryVal](GeometryType(0))).foreach {
+            internalRow.getArray(1).toSeq[BinaryView](GeometryType(0))
+              .zip(datum.getArray(1).toSeq[BinaryView](GeometryType(0))).foreach {
                 case (actual, expected) =>
                   assert(actual.getBytes === expected.getBytes)
               }
@@ -325,8 +333,8 @@ class ArrowWriterSuite extends SparkFunSuite {
           } else {
             assert(internalRow.getMap(2).keyArray().toSeq(StringType) ===
               datum.getMap(2).keyArray().toSeq(StringType))
-            internalRow.getMap(2).valueArray().toSeq[GeometryVal](GeometryType("ANY"))
-              .zip(datum.getMap(2).valueArray().toSeq[GeometryVal](GeometryType("ANY"))).foreach {
+            internalRow.getMap(2).valueArray().toSeq[BinaryView](GeometryType("ANY"))
+              .zip(datum.getMap(2).valueArray().toSeq[BinaryView](GeometryType("ANY"))).foreach {
                 case (actual, expected) =>
                   assert((actual == null && expected == null) ||
                     actual.getBytes === expected.getBytes)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AnalyzerBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AnalyzerBenchmark.scala
new file mode 100644
index 0000000000000..141d5966b4b6c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AnalyzerBenchmark.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import scala.concurrent.duration._
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.classic
+import org.apache.spark.sql.execution.QueryExecution
+
+/**
+ * Benchmark to measure the overhead of cloning the analyzer for transactional query execution.
+ * Each transactional query creates a new [[Analyzer]] instance via
+ * [[Analyzer.withCatalogManager]], which shares all rules with the original but carries a
+ * per-query [[org.apache.spark.sql.connector.catalog.CatalogManager]]. This benchmark checks
+ * whether the cloning introduces measurable overhead.
+ *
+ * To run this benchmark:
+ * {{{
+ *   build/sbt "sql/Test/runMain <this class>"
+ * }}}
+ */
+object AnalyzerBenchmark extends SqlBasedBenchmark {
+
+  private val numRows = 100
+  private val queries = Seq(
+    "simple select"     -> "SELECT id, val FROM t1",
+    "join"              -> "SELECT t1.id, t2.val FROM t1 JOIN t2 ON t1.id = t2.id",
+    "wide schema"       -> s"SELECT ${(1 to 100).map(i => s"col_$i").mkString(", ")} FROM wide_t"
+  )
+
+  private def setupTables(): Unit = {
+    spark.range(numRows).selectExpr("id", "id * 2 as val").createOrReplaceTempView("t1")
+    spark.range(numRows).selectExpr("id", "id * 3 as val").createOrReplaceTempView("t2")
+    spark.range(numRows)
+      .selectExpr((1 to numRows).map(i => s"id as col_$i"): _*)
+      .createOrReplaceTempView("wide_t")
+  }
+
+  /**
+   * Measures analysis time for a pre-parsed plan, comparing the session analyzer against a
+   * cloned analyzer created via [[Analyzer.withCatalogManager]].
+   *
+   * Two cases:
+   *  - "session analyzer"           : baseline, uses the session analyzer directly.
+   *  - "cloned analyzer (per query)": analyzer cloned every iteration; reflects the full
+   *                                   per-transactional-query cost (clone + analysis).
+   */
+  def analysisBenchmark(): Unit = {
+    for ((name, sql) <- queries) {
+      runBenchmark(s"analysis overhead $name") {
+        val benchmark = new Benchmark(
+          name = s"analysis overhead $name",
+          // Per row measurements are not meaningful here.
+          valuesPerIteration = numRows,
+          minTime = 10.seconds,
+          output = output)
+        val catalogManager = spark.sessionState.catalogManager
+
+        benchmark.addCase("session analyzer") { _ =>
+          val plan = spark.sessionState.sqlParser.parsePlan(sql)
+          new QueryExecution(spark.asInstanceOf[classic.SparkSession], plan).analyzed
+        }
+
+        benchmark.addCase("cloned analyzer (per query)") { _ =>
+          val cloned = spark.sessionState.analyzer.withCatalogManager(catalogManager)
+          val plan = spark.sessionState.sqlParser.parsePlan(sql)
+          new QueryExecution(spark.asInstanceOf[classic.SparkSession],
+            plan, analyzerOpt = Some(cloned)).analyzed
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  /**
+   * Micro-benchmark for [[Analyzer.withCatalogManager]] in isolation: measures the cost of
+   * instantiating the anonymous [[Analyzer]] subclass, independent of analysis work.
+   */
+  def cloneCostBenchmark(): Unit = {
+    runBenchmark("analyzer clone cost") {
+      val numRows = 1 // Per row measurements are not meaningful here.
+      val benchmark = new Benchmark(
+        name = "analyzer clone cost",
+        valuesPerIteration = numRows,
+        output = output)
+      val catalogManager = spark.sessionState.catalogManager
+
+      benchmark.addCase("withCatalogManager") { _ =>
+        spark.sessionState.analyzer.withCatalogManager(catalogManager)
+      }
+
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    setupTables()
+    cloneCostBenchmark()
+    analysisBenchmark()
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MergeRowsExecBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MergeRowsExecBenchmark.scala
index 8ddbca46b7396..0fcac326d923d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MergeRowsExecBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MergeRowsExecBenchmark.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.benchmark
 
+import scala.concurrent.duration._
+
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterThan, IsNotNull, Literal}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
@@ -43,6 +45,18 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
 
   private val N = 20 << 20
 
+  /** Longer warm-up and timed window for stable interpreted (whole-stage off) results. */
+  private def mergeRowsBenchmark(name: String, cardinality: Long)(f: => Unit): Unit = {
+    codegenBenchmark(
+      name,
+      cardinality,
+      warmupTime = 7.seconds,
+      minTime = 7.seconds,
+      minNumIters = 3,
+      wholestageOffNumIters = 0,
+      wholestageOnNumIters = 0)(f)
+  }
+
   /**
    * Creates a DataFrame simulating the join output from a MERGE operation.
    *
@@ -110,7 +124,7 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
       a(0), a(5), a(6), a(3)
     )))
 
-    codegenBenchmark("merge - matched update only", N) {
+    mergeRowsBenchmark("merge - matched update only", N) {
       val df = buildMergeRowsDF(inputDF, matchedInstr)
       assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[MergeRowsExec]))
       df.noop()
@@ -126,7 +140,7 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
       a(4), a(5), a(6), a(7)
     )))
 
-    codegenBenchmark("merge - not matched insert only", N) {
+    mergeRowsBenchmark("merge - not matched insert only", N) {
       val df = buildMergeRowsDF(inputDF, Seq.empty, notMatchedInstr)
       assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[MergeRowsExec]))
       df.noop()
@@ -144,7 +158,7 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
       a(4), a(5), a(6), a(7)
     )))
 
-    codegenBenchmark("merge - matched update + not matched insert", N) {
+    mergeRowsBenchmark("merge - matched update + not matched insert", N) {
       val df = buildMergeRowsDF(inputDF, matchedInstr, notMatchedInstr)
       assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[MergeRowsExec]))
       df.noop()
@@ -156,7 +170,7 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
 
     val matchedInstr = Seq(Discard(TrueLiteral))
 
-    codegenBenchmark("merge - matched delete", N) {
+    mergeRowsBenchmark("merge - matched delete", N) {
       val df = buildMergeRowsDF(inputDF, matchedInstr)
       assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[MergeRowsExec]))
       df.noop()
@@ -177,7 +191,7 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
       Keep(Insert, GreaterThan(a(5), Literal(500)), Seq(a(4), a(5), a(6), a(7)))
     )
 
-    codegenBenchmark("merge - conditional clauses", N) {
+    mergeRowsBenchmark("merge - conditional clauses", N) {
       val df = buildMergeRowsDF(inputDF, matchedInstr, notMatchedInstr)
       assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[MergeRowsExec]))
       df.noop()
@@ -199,7 +213,7 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
     )))
     val notMatchedBySourceInstr = Seq(Discard(TrueLiteral))
 
-    codegenBenchmark("merge - matched + not matched + not matched by source", N) {
+    mergeRowsBenchmark("merge - matched + not matched + not matched by source", N) {
       val df = buildMergeRowsDF(inputDF, matchedInstr, notMatchedInstr, notMatchedBySourceInstr)
       assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[MergeRowsExec]))
       df.noop()
@@ -216,7 +230,7 @@ object MergeRowsExecBenchmark extends SqlBasedBenchmark with ClassicConversions
       Seq(a(0), a(5), a(6), a(3))
     ))
 
-    codegenBenchmark("merge - split update (delete + insert)", N) {
+    mergeRowsBenchmark("merge - split update (delete + insert)", N) {
       val df = buildMergeRowsDF(inputDF, matchedInstr)
       assert(df.queryExecution.sparkPlan.exists(_.isInstanceOf[MergeRowsExec]))
       df.noop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
index 78d6b01580355..6c60721599bbb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.benchmark
 
+import scala.concurrent.duration._
+
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.internal.config.UI.UI_ENABLED
@@ -46,17 +48,42 @@ trait SqlBasedBenchmark extends BenchmarkBase with SQLHelper {
       .getOrCreate()
   }
 
-  /** Runs function `f` with whole stage codegen on and off. */
-  final def codegenBenchmark(name: String, cardinality: Long)(f: => Unit): Unit = {
-    val benchmark = new Benchmark(name, cardinality, output = output)
+  /**
+   * Runs function `f` with whole stage codegen on and off.
+   *
+   * @param minNumIters minimum timed iterations per case when the corresponding
+   *        `wholestageOffNumIters` or `wholestageOnNumIters` is zero.
+   * @param warmupTime JIT warm-up duration per case before timed iterations.
+   * @param minTime minimum total timed duration per case when the corresponding
+   *        `wholestageOffNumIters` or `wholestageOnNumIters` is zero.
+   * @param wholestageOffNumIters if non-zero, run exactly this many timed iterations
+   *        for the wholestage-off case; otherwise use `minNumIters` and `minTime`.
+   * @param wholestageOnNumIters if non-zero, run exactly this many timed iterations
+   *        for the wholestage-on case; otherwise use `minNumIters` and `minTime`.
+   */
+  final def codegenBenchmark(
+      name: String,
+      cardinality: Long,
+      minNumIters: Int = 2,
+      warmupTime: FiniteDuration = 2.seconds,
+      minTime: FiniteDuration = 2.seconds,
+      wholestageOffNumIters: Int = 2,
+      wholestageOnNumIters: Int = 5)(f: => Unit): Unit = {
+    val benchmark = new Benchmark(
+      name,
+      cardinality,
+      minNumIters = minNumIters,
+      warmupTime = warmupTime,
+      minTime = minTime,
+      output = output)
 
-    benchmark.addCase(s"$name wholestage off", numIters = 2) { _ =>
+    benchmark.addCase(s"$name wholestage off", numIters = wholestageOffNumIters) { _ =>
       withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
         f
       }
     }
 
-    benchmark.addCase(s"$name wholestage on", numIters = 5) { _ =>
+    benchmark.addCase(s"$name wholestage on", numIters = wholestageOnNumIters) { _ =>
       withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
         f
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnionBenchmark.scala
new file mode 100644
index 0000000000000..ea9dac094d433
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnionBenchmark.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Benchmark Union codegen fusion.
+ *
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class>
+ *        --jars <sql/core test jar>,<spark catalyst jar> <sql/core test jar>
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/UnionBenchmark-results.txt".
+ * }}}
+ */
+object UnionBenchmark extends SqlBasedBenchmark {
+
+  // Multi-core master is required: with `local[1]`, each `Range` child produces
+  // a single partition, `UnionExec.outputPartitioning` collapses to
+  // `SinglePartition`, and fusion is denied (it applies only on the
+  // `UnknownPartitioning` path). Use `local[4]` so children have multiple
+  // partitions and the Union ends up on the fusable path.
+  override def getSparkSession: SparkSession = {
+    SparkSession.builder()
+      .master("local[4]")
+      .appName(this.getClass.getCanonicalName)
+      .config(SQLConf.SHUFFLE_PARTITIONS.key, 4)
+      .config("spark.ui.enabled", false)
+      .getOrCreate()
+  }
+
+  // Plain children: each child is a bare Range, all children share the same type.
+  private def buildPlainQuery(n: Int): DataFrame = {
+    val dfs = (0 until n).map(i =>
+      spark.range(i * 10000L, i * 10000L + 10000L).toDF("id"))
+    dfs.reduce((a, b) => a.union(b)).filter(col("id") > 0)
+  }
+
+  // Type widening: alternate int and long children so UnionExec needs to
+  // insert `Cast` nodes in `perChildProjections`, adding per-row work in
+  // each helper method.
+  private def buildWideningQuery(n: Int): DataFrame = {
+    val dfs = (0 until n).map { i =>
+      val df = spark.range(i * 10000L, i * 10000L + 10000L).toDF("id")
+      if (i % 2 == 0) df.select(col("id").cast(IntegerType).as("id"))
+      else df
+    }
+    dfs.reduce((a, b) => a.union(b)).filter(col("id") > 0)
+  }
+
+  // Fatter helpers: each child has its own per-child filter + projection,
+  // which inflates the generated helper method body and stresses HotSpot
+  // inlining/compilation.
+  private def buildPerChildOpsQuery(n: Int): DataFrame = {
+    val dfs = (0 until n).map { i =>
+      spark.range(i * 10000L, i * 10000L + 10000L).toDF("id")
+        .filter(col("id") > i.toLong)
+        .select((col("id") + 1).as("id"))
+    }
+    dfs.reduce((a, b) => a.union(b)).filter(col("id") > 0)
+  }
+
+  // TPC-DS-style shape: union of per-channel projections followed by
+  // grouping aggregation. Matches Q2 (wswscs + weekly sums), Q5 (per-channel
+  // rollups), and similar ETL queries where UNION ALL feeds an aggregate.
+  private def buildDownstreamAggQuery(n: Int): DataFrame = {
+    val dfs = (0 until n).map(i =>
+      spark.range(i * 10000L, i * 10000L + 10000L).toDF("id"))
+    dfs.reduce((a, b) => a.union(b))
+      .groupBy((col("id") % 10).as("bucket"))
+      .count()
+  }
+
+  private def runScenario(
+      name: String,
+      build: Int => DataFrame,
+      ns: Seq[Int]): Unit = {
+    ns.foreach { n =>
+      runBenchmark(s"$name (N=$n)") {
+        val benchmark = new Benchmark(
+          s"$name, N=$n",
+          valuesPerIteration = 10000L * n,
+          output = output)
+
+        benchmark.addCase(s"codegen=off") { _ =>
+          withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+            build(n).noop()
+          }
+        }
+
+        benchmark.addCase(s"codegen=on") { _ =>
+          withSQLConf(
+            SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "true",
+            SQLConf.WHOLESTAGE_UNION_MAX_CHILDREN.key -> "8192") {
+            build(n).noop()
+          }
+        }
+
+        benchmark.run()
+      }
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val ns = Seq(2, 4, 8, 16, 32, 64, 128, 256, 512, 1024)
+    runScenario("Union plain", buildPlainQuery, ns)
+    runScenario("Union type widening", buildWideningQuery, ns)
+    runScenario("Union per-child ops", buildPerChildOpsQuery, ns)
+    runScenario("Union + downstream aggregate", buildDownstreamAggQuery, ns)
+  }
+
+  private def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val saved = pairs.map { case (k, _) => k -> spark.conf.getOption(k) }
+    try {
+      pairs.foreach { case (k, v) => spark.conf.set(k, v) }
+      f
+    } finally {
+      saved.foreach {
+        case (k, Some(v)) => spark.conf.set(k, v)
+        case (k, None) => spark.conf.unset(k)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WindowBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WindowBenchmark.scala
new file mode 100644
index 0000000000000..2284ff037e839
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WindowBenchmark.scala
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import scala.collection.mutable
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Benchmark for window functions with bounded ROWS frames.
+ *
+ * Matrix (see PR description for rationale):
+ *   - A: 5 aggregates x 3 cells (naive / segtree default / segtree bs=256) @ W=1001.
+ *     Per-case N so naive ~3-5s/iter; STDDEV_SAMP pinned @ N=2M (multi-buffer stress).
+ *   - B: SUM-over-INT, W sweep {10, 50, 201, 4001}; W=10/50 Pareto-loss stress,
+ *     W=4001 also runs bs=256.
+ *   - F: Spill guard, 1M String x MAX x W=1001 (stress).
+ *   - C: N-sweep {2M, 8M, 16M} segtree-only @ W=1001 (memory-pressure invariance).
+ *
+ * Compare Per Row(ns) column for O(log W) scaling. Dev smoke via positional
+ * mainArgs: (0)=rowCount, (1)=halfWindow (default 100); do NOT combine with
+ * SPARK_GENERATE_BENCHMARK_FILES=1.
+ */
+object WindowBenchmark extends SqlBasedBenchmark {
+
+  // Section A: per-case N calibrated so naive baseline lands ~3-5s/iter.
+  private val A_N_INT: Long = 256L * 1024              // MIN/MAX/SUM/COUNT @ W=1001
+  private val A_N_AVG: Long = 192L * 1024              // AVG  @ W=1001
+  private val A_N_STDDEV: Long = 2L * 1000L * 1000L    // STDDEV stress
+
+  // Section B: W-sweep (W=10/50 stress: Pareto loss zone; W=4001 stress: O(W) cliff).
+  private val B_N_W10: Long = 2L * 1000L * 1000L
+  private val B_N_W50: Long = 2L * 1000L * 1000L
+  private val B_N_W201: Long = 1L * 1000L * 1000L
+  private val B_N_W4001: Long = 2L * 1000L * 1000L
+
+  // Section F: String spill.
+  private val SPILL_N: Long = 1L * 1000L * 1000L
+
+  // Section C: N-sweep segtree-only.
+  private val C_N_SMALL: Long = 2L * 1000L * 1000L
+  private val C_N_MID: Long = 8L * 1000L * 1000L
+  private val C_N_LARGE: Long = 16L * 1000L * 1000L
+  private val C_HALF_W: Int = 500                      // W=1001
+
+  private val MAIN_HALF_W: Int = 500                   // Section A W=1001
+
+  private val ITERS_NORMAL: Int = 5
+  private val ITERS_STRESS: Int = 3
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val smokeMode = mainArgs.nonEmpty
+    val smokeRowCount = if (smokeMode) mainArgs(0).toLong else 0L
+    val smokeHalfW = if (mainArgs.length > 1) mainArgs(1).toInt else 100
+
+    if (smokeMode) {
+      require(smokeRowCount >= 4096,
+        s"rowCount=$smokeRowCount too small; segtree may fallback. Use >= 4096.")
+    }
+
+    // Shared metrics listener: per-case peak mem + disk spill.
+    val metrics = mutable.Map[String, (Long, Long)]()
+    @volatile var currentCase: String = ""
+    val listener = new SparkListener {
+      override def onTaskEnd(e: SparkListenerTaskEnd): Unit = {
+        val m = e.taskMetrics
+        val cc = currentCase
+        if (m != null && cc.nonEmpty) {
+          metrics.synchronized {
+            val (pm, ds) = metrics.getOrElse(cc, (0L, 0L))
+            metrics(cc) = (math.max(pm, m.peakExecutionMemory), ds + m.diskBytesSpilled)
+          }
+        }
+      }
+    }
+    spark.sparkContext.addSparkListener(listener)
+    val allCaseNames = mutable.ArrayBuffer[String]()
+
+    def setupIntTable(n: Long): Unit = {
+      spark.range(n)
+        .selectExpr("id", "cast(rand(42) * 1000000 as int) as v")
+        .coalesce(1)
+        .createOrReplaceTempView("t")
+    }
+
+    def setupStringTable(n: Long): Unit = {
+      // ~20-char variable-length string; exercises spill path.
+      spark.range(n)
+        .selectExpr("id", "repeat(cast(id as string), 5) as v")
+        .coalesce(1)
+        .createOrReplaceTempView("t")
+    }
+
+    def frameFor(halfW: Int): String =
+      s"OVER (ORDER BY id ROWS BETWEEN $halfW PRECEDING AND $halfW FOLLOWING)"
+
+    // FP-digest trap: segtree merge order differs from row-by-row SlidingWindow
+    // order for AVG/STDDEV/VAR, so results differ at the ULP level (mathematically
+    // equivalent). HASH(double) is bit-sensitive, and HASH(ROUND(m, k)) is a trap:
+    // rows within ~1 ULP of a rounding bin boundary round differently across
+    // backends, and SUM of hashes amplifies tiny FP diffs into large digest drift
+    // (observed: 0.2% digest diff on STDDEV_SAMP @ N=2M even with per-row rel err
+    // <1e-10). Use SUM(CAST(ROUND(m, 3) * 1000 AS BIGINT)): one boundary-crossing
+    // row only contributes +/-1, so ULP-identical impls always agree while real
+    // bugs >1e-3 rel err are still caught. Integer aggregates remain bit-exact.
+    def digestExprFor(aggFn: String): String = {
+      if (aggFn.startsWith("AVG") || aggFn.startsWith("STDDEV") ||
+          aggFn.startsWith("VAR")) {
+        "CAST(ROUND(m, 3) * 1000 AS BIGINT)"
+      } else {
+        "HASH(m)"
+      }
+    }
+
+    def digest(aggFn: String, frame: String, sqlConfs: (String, String)*): Long = {
+      val expr = digestExprFor(aggFn)
+      withSQLConf(sqlConfs: _*) {
+        spark.sql(s"SELECT SUM($expr) FROM (SELECT $aggFn(v) $frame AS m FROM t)")
+          .head().getLong(0)
+      }
+    }
+
+    def rowsLabel(rows: Long): String = {
+      if (rows >= 1000000) s"${rows / 1000000}M"
+      else if (rows >= 1024) s"${rows / 1024}K"
+      else rows.toString
+    }
+
+    def runSectionA(
+        aggFn: String, iters: Int, rows: Long, halfW: Int, stressMark: String): Unit = {
+      val frame = frameFor(halfW)
+      val dNaive = digest(aggFn, frame)
+      val dSeg = digest(aggFn, frame, SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true")
+      val dSegBs = digest(aggFn, frame,
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+        SQLConf.WINDOW_SEGMENT_TREE_BLOCK_SIZE.key -> "256")
+      require(dNaive == dSeg,
+        s"$aggFn segtree digest mismatch: naive=$dNaive seg=$dSeg")
+      require(dNaive == dSegBs,
+        s"$aggFn segtree (bs=256) digest mismatch: naive=$dNaive seg=$dSegBs")
+
+      val W = 2 * halfW + 1
+      val benchmark = new Benchmark(
+        s"$aggFn sliding window, W=$W, ${rowsLabel(rows)} rows$stressMark",
+        rows, output = output)
+      val nNaive = s"$aggFn naive (current, baseline)"
+      val nSeg = s"$aggFn segtree (default)"
+      val nSegBs = s"$aggFn segtree (blockSize=256)"
+      allCaseNames ++= Seq(nNaive, nSeg, nSegBs)
+
+      benchmark.addCase(nNaive, numIters = iters) { _ =>
+        currentCase = nNaive
+        spark.sql(s"SELECT $aggFn(v) $frame FROM t").noop()
+      }
+      benchmark.addCase(nSeg, numIters = iters) { _ =>
+        currentCase = nSeg
+        withSQLConf(SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true") {
+          spark.sql(s"SELECT $aggFn(v) $frame FROM t").noop()
+        }
+      }
+      benchmark.addCase(nSegBs, numIters = iters) { _ =>
+        currentCase = nSegBs
+        withSQLConf(
+          SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+          SQLConf.WINDOW_SEGMENT_TREE_BLOCK_SIZE.key -> "256") {
+          spark.sql(s"SELECT $aggFn(v) $frame FROM t").noop()
+        }
+      }
+      benchmark.run()
+    }
+
+    def runSectionB(
+        halfW: Int, stressBs: Boolean, rows: Long, iters: Int, stressMark: String): Unit = {
+      val aggFn = "SUM"
+      val frame = frameFor(halfW)
+      val W = 2 * halfW + 1
+      val dNaive = digest(aggFn, frame)
+      val dSeg = digest(aggFn, frame, SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true")
+      require(dNaive == dSeg, s"Section B W=$W digest mismatch: naive=$dNaive seg=$dSeg")
+
+      val benchmark = new Benchmark(
+        s"$aggFn scaling, W=$W, ${rowsLabel(rows)} rows$stressMark", rows, output = output)
+      val nNaive = s"$aggFn naive W=$W"
+      val nSeg = s"$aggFn segtree (default) W=$W"
+      allCaseNames ++= Seq(nNaive, nSeg)
+      benchmark.addCase(nNaive, numIters = iters) { _ =>
+        currentCase = nNaive
+        spark.sql(s"SELECT $aggFn(v) $frame FROM t").noop()
+      }
+      benchmark.addCase(nSeg, numIters = iters) { _ =>
+        currentCase = nSeg
+        withSQLConf(SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true") {
+          spark.sql(s"SELECT $aggFn(v) $frame FROM t").noop()
+        }
+      }
+      if (stressBs) {
+        val dSegBs = digest(aggFn, frame,
+          SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+          SQLConf.WINDOW_SEGMENT_TREE_BLOCK_SIZE.key -> "256")
+        require(dNaive == dSegBs,
+          s"Section B W=$W bs=256 digest mismatch: naive=$dNaive segBs=$dSegBs")
+        val nSegBs = s"$aggFn segtree (blockSize=256) W=$W"
+        allCaseNames += nSegBs
+        benchmark.addCase(nSegBs, numIters = iters) { _ =>
+          currentCase = nSegBs
+          withSQLConf(
+            SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+            SQLConf.WINDOW_SEGMENT_TREE_BLOCK_SIZE.key -> "256") {
+            spark.sql(s"SELECT $aggFn(v) $frame FROM t").noop()
+          }
+        }
+      }
+      benchmark.run()
+    }
+
+    def runSpillGuard(): Unit = {
+      val halfW = 500
+      val frame = frameFor(halfW)
+      // Digest parity skipped: pre-check scan on 1M String x W=1001 naive costs
+      // ~90s; correctness covered by SegmentTreeWindowFunctionSuite.
+      val benchmark = new Benchmark(
+        "MAX String spill guard, W=1001, 1M rows (stress)", SPILL_N, output = output)
+      val nNaive = "MAX naive (String)"
+      val nSeg = "MAX segtree default (String)"
+      allCaseNames ++= Seq(nNaive, nSeg)
+      benchmark.addCase(nNaive, numIters = ITERS_STRESS) { _ =>
+        currentCase = nNaive
+        spark.sql(s"SELECT MAX(v) $frame FROM t").noop()
+      }
+      benchmark.addCase(nSeg, numIters = ITERS_STRESS) { _ =>
+        currentCase = nSeg
+        withSQLConf(SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true") {
+          spark.sql(s"SELECT MAX(v) $frame FROM t").noop()
+        }
+      }
+      benchmark.run()
+    }
+
+    // Section C: N-sweep, segtree-only. Digest check skipped (Section A SUM @ N=2M
+    // covers the same code path). Naive skipped at 16M (~4min/iter, no value).
+    // Goal: memory-pressure invariance - per-row ns at 16M should be <= 2x at 2M.
+    def runSectionC(rows: Long): Unit = {
+      val aggFn = "SUM"
+      val frame = frameFor(C_HALF_W)
+      val W = 2 * C_HALF_W + 1
+      val benchmark = new Benchmark(
+        s"$aggFn N-sweep (segtree-only), W=$W, ${rowsLabel(rows)} rows (stress)",
+        rows, output = output)
+      val nSeg = s"$aggFn segtree (default) N=${rowsLabel(rows)}"
+      allCaseNames += nSeg
+      benchmark.addCase(nSeg, numIters = ITERS_STRESS) { _ =>
+        currentCase = nSeg
+        withSQLConf(SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true") {
+          spark.sql(s"SELECT $aggFn(v) $frame FROM t").noop()
+        }
+      }
+      benchmark.run()
+    }
+
+    try {
+      if (smokeMode) {
+        setupIntTable(smokeRowCount)
+        runBenchmark("SMOKE: Section A MIN") {
+          runSectionA("MIN", ITERS_STRESS, smokeRowCount, smokeHalfW, "")
+        }
+        runBenchmark("SMOKE: Section B SUM W sweep point") {
+          runSectionB(
+            smokeHalfW, stressBs = smokeHalfW >= 2000, smokeRowCount, ITERS_STRESS, "")
+        }
+      } else {
+        setupIntTable(A_N_INT)
+        runBenchmark("Section A - MIN (non-invertible)") {
+          runSectionA("MIN", ITERS_NORMAL, A_N_INT, MAIN_HALF_W, "")
+        }
+        runBenchmark("Section A - MAX (non-invertible)") {
+          runSectionA("MAX", ITERS_NORMAL, A_N_INT, MAIN_HALF_W, "")
+        }
+        runBenchmark("Section A - SUM (Spark has no inverse; full recompute)") {
+          runSectionA("SUM", ITERS_NORMAL, A_N_INT, MAIN_HALF_W, "")
+        }
+        runBenchmark("Section A - COUNT") {
+          runSectionA("COUNT", ITERS_NORMAL, A_N_INT, MAIN_HALF_W, "")
+        }
+
+        setupIntTable(A_N_AVG)
+        runBenchmark("Section A - AVG (multi-buffer)") {
+          runSectionA("AVG", ITERS_NORMAL, A_N_AVG, MAIN_HALF_W, "")
+        }
+
+        setupIntTable(A_N_STDDEV)
+        runBenchmark("Section A - STDDEV_SAMP (multi-buffer, stress)") {
+          runSectionA("STDDEV_SAMP", ITERS_STRESS, A_N_STDDEV, MAIN_HALF_W, " (stress)")
+        }
+
+        setupIntTable(B_N_W10)
+        runBenchmark("Section B - W=10 scaling (stress: Pareto loss zone)") {
+          runSectionB(5, stressBs = false, B_N_W10, ITERS_STRESS, " (stress)")
+        }
+        setupIntTable(B_N_W50)
+        runBenchmark("Section B - W=50 scaling (stress: Pareto loss zone)") {
+          runSectionB(25, stressBs = false, B_N_W50, ITERS_STRESS, " (stress)")
+        }
+        setupIntTable(B_N_W201)
+        runBenchmark("Section B - W=201 scaling") {
+          runSectionB(100, stressBs = false, B_N_W201, ITERS_NORMAL, "")
+        }
+        setupIntTable(B_N_W4001)
+        runBenchmark("Section B - W=4001 scaling (stress, + bs=256 cross-block)") {
+          runSectionB(2000, stressBs = true, B_N_W4001, ITERS_STRESS, " (stress)")
+        }
+
+        setupStringTable(SPILL_N)
+        runBenchmark("Section F - spill regression guard (String, stress)") {
+          runSpillGuard()
+        }
+
+        setupIntTable(C_N_SMALL)
+        runBenchmark("Section C - N-sweep small (stress)") {
+          runSectionC(C_N_SMALL)
+        }
+        setupIntTable(C_N_MID)
+        runBenchmark("Section C - N-sweep mid (stress)") {
+          runSectionC(C_N_MID)
+        }
+        setupIntTable(C_N_LARGE)
+        runBenchmark("Section C - N-sweep large (stress)") {
+          runSectionC(C_N_LARGE)
+        }
+      }
+
+      // Drain listener before reading metrics.
+      spark.sparkContext.listenerBus.waitUntilEmpty()
+
+      // scalastyle:off println
+      val out = System.out
+      out.println()
+      out.println("Memory/Spill (peak executor memory / total disk spilled):")
+      if (allCaseNames.nonEmpty) {
+        val width = allCaseNames.map(_.length).max + 2
+        for (name <- allCaseNames) {
+          val (pm, ds) = metrics.synchronized {
+            metrics.getOrElse(name, (0L, 0L))
+          }
+          val peakMb = pm.toDouble / (1024.0 * 1024.0)
+          val label = (name + ":").padTo(width, ' ')
+          out.println(f"  $label%s peak=$peakMb%8.2f MB   spilled=$ds%d B")
+        }
+      }
+      out.println()
+      // scalastyle:on println
+    } finally {
+      spark.sparkContext.removeSparkListener(listener)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
index 14cf72c78dbee..c88ebb0d69ee7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
@@ -188,6 +188,16 @@ abstract class AlignAssignmentsSuiteBase extends AnalysisTest {
     when(manager.v1SessionCatalog).thenReturn(v1SessionCatalog)
     when(manager.v2SessionCatalog).thenReturn(v2SessionCatalog)
     when(manager.tempVariableManager).thenReturn(tempVariableManager)
+    when(manager.sessionPathEntries).thenReturn(None)
+    val defaultPath = SQLConf.get.resolutionSearchPath(Seq(v2Catalog.name()))
+    when(manager.sqlResolutionPathEntries(
+      any[String], any[Seq[String]], any[String], any[Seq[String]]))
+      .thenReturn(defaultPath)
+    when(manager.sqlResolutionPathEntries(any[String], any[Seq[String]]))
+      .thenReturn(defaultPath)
+    when(manager.resolutionPathEntriesForAnalysis(
+      any[Option[Seq[Seq[String]]]], any[Seq[String]]))
+      .thenReturn(defaultPath)
     manager
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewAsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewAsSuiteBase.scala
new file mode 100644
index 0000000000000..1250ce84af1dd
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewAsSuiteBase.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import java.util.Locale
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+
+/**
+ * Unified tests for `ALTER VIEW ... AS` against V1 (session) and V2 view catalogs.
+ */
+trait AlterViewAsSuiteBase extends QueryTest with DDLCommandTestUtils {
+  import testImplicits._
+  override val command: String = "ALTER VIEW ... AS"
+
+  protected def namespace: String = "default"
+
+  protected def withSourceTable(values: Int*)(body: => Unit): Unit = {
+    withTable("spark_catalog.default.alter_src") {
+      values.toSeq.toDF("x").write.saveAsTable("spark_catalog.default.alter_src")
+      body
+    }
+  }
+
+  test("ALTER VIEW updates the body of an existing view") {
+    val view = s"$catalog.$namespace.v_alter_body"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $view AS " +
+        s"SELECT x FROM spark_catalog.default.alter_src WHERE x > 10")
+      checkAnswer(spark.table(view), Seq.empty[Row])
+      sql(s"ALTER VIEW $view AS " +
+        s"SELECT x FROM spark_catalog.default.alter_src WHERE x > 1")
+      checkAnswer(spark.table(view), Seq(Row(2), Row(3)))
+    }
+  }
+
+  test("ALTER VIEW on a missing view fails at analysis") {
+    val view = s"$catalog.$namespace.v_alter_missing"
+    intercept[AnalysisException] {
+      sql(s"ALTER VIEW $view AS SELECT 1 AS x")
+    }
+  }
+
+  test("ALTER VIEW rejects reference to a temporary function") {
+    val view = s"$catalog.$namespace.v_alter_tempfn"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.alter_src")
+      spark.udf.register("temp_udf_alter", (i: Int) => i + 1)
+      val ex = intercept[AnalysisException] {
+        sql(s"ALTER VIEW $view AS " +
+          s"SELECT temp_udf_alter(x) FROM spark_catalog.default.alter_src")
+      }
+      assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("temporary"))
+    }
+  }
+
+  test("ALTER VIEW rejects reference to a temporary view") {
+    val view = s"$catalog.$namespace.v_alter_tempview"
+    withSourceTable(1) {
+      sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.alter_src")
+      withTempView("tv_alter") {
+        spark.range(3).createOrReplaceTempView("tv_alter")
+        val ex = intercept[AnalysisException] {
+          sql(s"ALTER VIEW $view AS SELECT id AS x FROM tv_alter")
+        }
+        assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("temporary"))
+      }
+    }
+  }
+
+  test("ALTER VIEW rejects reference to a temporary variable") {
+    val view = s"$catalog.$namespace.v_alter_tempvar"
+    withSourceTable(1) {
+      sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.alter_src")
+      sql("DECLARE OR REPLACE VARIABLE temp_var_alter INT DEFAULT 1")
+      try {
+        val ex = intercept[AnalysisException] {
+          sql(s"ALTER VIEW $view AS SELECT temp_var_alter AS x")
+        }
+        assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("temporary"))
+      } finally {
+        sql("DROP TEMPORARY VARIABLE IF EXISTS temp_var_alter")
+      }
+    }
+  }
+
+  test("ALTER VIEW preserves user-set TBLPROPERTIES") {
+    val view = s"$catalog.$namespace.v_alter_keep_props"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $view " +
+        s"TBLPROPERTIES ('key1' = 'val1') AS " +
+        s"SELECT x FROM spark_catalog.default.alter_src")
+      sql(s"ALTER VIEW $view SET TBLPROPERTIES ('key2' = 'val2')")
+      sql(s"ALTER VIEW $view AS SELECT x + 1 AS x FROM spark_catalog.default.alter_src")
+      val rows = sql(s"SHOW TBLPROPERTIES $view").collect()
+      val pairs = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+      assert(pairs.get("key1").contains("val1"))
+      assert(pairs.get("key2").contains("val2"))
+    }
+  }
+
+  test("ALTER VIEW preserves SCHEMA EVOLUTION binding mode") {
+    val view = s"$catalog.$namespace.v_alter_keep_schema_mode"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $view WITH SCHEMA EVOLUTION AS " +
+        s"SELECT x FROM spark_catalog.default.alter_src")
+      sql(s"ALTER VIEW $view AS SELECT x + 1 AS x FROM spark_catalog.default.alter_src")
+      val ddl = sql(s"SHOW CREATE TABLE $view").collect().head.getString(0)
+      assert(ddl.contains("WITH SCHEMA EVOLUTION"),
+        s"schema-binding mode lost across ALTER VIEW AS:\n$ddl")
+    }
+  }
+
+  test("CREATE OR REPLACE VIEW with a body referencing a missing table fails") {
+    val view = s"$catalog.$namespace.v_alter_bad_body"
+    withSourceTable(1) {
+      sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.alter_src")
+      intercept[AnalysisException] {
+        sql(s"CREATE OR REPLACE VIEW $view AS SELECT x FROM does_not_exist_at_all")
+      }
+    }
+  }
+
+  test("ALTER VIEW detects a direct cyclic reference") {
+    val a = s"$catalog.$namespace.v_alter_cycle_a"
+    val b = s"$catalog.$namespace.v_alter_cycle_b"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $a AS SELECT x FROM spark_catalog.default.alter_src")
+      sql(s"CREATE VIEW $b AS SELECT x FROM $a")
+      val ex = intercept[AnalysisException] {
+        sql(s"ALTER VIEW $a AS SELECT x FROM $b")
+      }
+      assert(ex.getCondition == "RECURSIVE_VIEW")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewRenameSuiteBase.scala
new file mode 100644
index 0000000000000..1620425c2abd1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewRenameSuiteBase.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+
+/**
+ * Unified tests for `ALTER VIEW ... RENAME TO` against V1 (session) and V2 view catalogs.
+ */
+trait AlterViewRenameSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "ALTER VIEW ... RENAME TO"
+
+  protected def namespace: String = "default"
+
+  protected def createView(view: String): Unit = {
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+  }
+
+  protected def viewExists(qualified: String): Boolean = {
+    val parts = qualified.split('.').toSeq
+    val nsAndView = parts.tail
+    val ns = nsAndView.init.mkString(".")
+    val name = nsAndView.last
+    sql(s"SHOW VIEWS IN $catalog.$ns").collect().exists(_.getString(1) == name)
+  }
+
+  test("rename moves the entry") {
+    val src = s"$catalog.$namespace.v_rename_src"
+    val dstName = "v_rename_dst"
+    createView(src)
+    // v1 `AlterTableRenameCommand` requires a 1- or 2-part target identifier; a single name
+    // renames in the same namespace. v2 accepts either form. Use the unqualified form so the
+    // base test runs against both paths.
+    sql(s"ALTER VIEW $src RENAME TO $dstName")
+    assert(!viewExists(src), s"$src should be gone after rename")
+    assert(viewExists(s"$catalog.$namespace.$dstName"),
+      s"$catalog.$namespace.$dstName should exist after rename")
+  }
+
+  test("rename preserves the view body") {
+    val src = s"$catalog.$namespace.v_rename_body_src"
+    val dstName = "v_rename_body_dst"
+    sql(s"CREATE VIEW $src AS SELECT 7 AS answer")
+    sql(s"ALTER VIEW $src RENAME TO $dstName")
+    checkAnswer(sql(s"SELECT * FROM $catalog.$namespace.$dstName"), Row(7))
+  }
+
+  test("renaming to an existing name fails") {
+    val src = s"$catalog.$namespace.v_rename_collide_src"
+    val dst = s"$catalog.$namespace.v_rename_collide_dst"
+    createView(src)
+    createView(dst)
+    intercept[AnalysisException] {
+      sql(s"ALTER VIEW $src RENAME TO v_rename_collide_dst")
+    }
+  }
+
+  test("ALTER TABLE syntax on a view is rejected (use ALTER VIEW)") {
+    // `ALTER TABLE x RENAME TO y` and `ALTER VIEW x RENAME TO y` use the same parser entry
+    // (`UnresolvedTableOrView` + `isView` flag); when the resolved child is a view but the
+    // syntax says TABLE, error with EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW. v1 enforces this in
+    // `DDLUtils.verifyAlterTableType`; v2 enforces it in DataSourceV2Strategy.
+    val view = s"$catalog.$namespace.v_rename_wrong_syntax"
+    createView(view)
+    val ex = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $view RENAME TO v_rename_wrong_syntax_dst")
+    }
+    assert(ex.getCondition.startsWith("EXPECT_TABLE_NOT_VIEW"),
+      s"unexpected error condition: ${ex.getCondition}")
+  }
+
+  test("rename re-caches a previously cached view") {
+    // v1 `AlterTableRenameCommand` and v2 `RenameTableExec` both capture the cached storage
+    // level before rename and re-instate it on the new identifier afterwards. The v2 view
+    // path (`RenameV2ViewExec`) follows the same pattern -- without it, a user-cached view
+    // would silently lose its cache entry after RENAME.
+    val src = s"$catalog.$namespace.v_rename_cached_src"
+    val dstName = "v_rename_cached_dst"
+    val dst = s"$catalog.$namespace.$dstName"
+    createView(src)
+    spark.catalog.cacheTable(src)
+    assert(spark.catalog.isCached(src), "bad test: view was not cached in the first place")
+    try {
+      sql(s"ALTER VIEW $src RENAME TO $dstName")
+      // After rename, the destination's plan must still be cached. Resolving the old name
+      // post-rename throws TABLE_OR_VIEW_NOT_FOUND, so we only check the destination side.
+      assert(spark.catalog.isCached(dst),
+        s"$dst should still be cached after RENAME")
+    } finally {
+      spark.catalog.uncacheTable(dst)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewSchemaBindingSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewSchemaBindingSuiteBase.scala
new file mode 100644
index 0000000000000..25bbe73e5b765
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewSchemaBindingSuiteBase.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * Unified tests for `ALTER VIEW ... WITH SCHEMA` against V1 (session) and V2 view catalogs.
+ */
+trait AlterViewSchemaBindingSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "ALTER VIEW ... WITH SCHEMA"
+
+  protected def namespace: String = "default"
+
+  protected def createView(view: String): Unit = {
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+  }
+
+  protected def schemaModeOf(view: String): String = {
+    val rows = sql(s"SHOW CREATE TABLE $view").collect()
+    val ddl = rows.head.getString(0)
+    // Extract the WITH SCHEMA <mode> clause if present.
+    val pattern = """WITH SCHEMA\s+(BINDING|COMPENSATION|EVOLUTION|TYPE EVOLUTION)""".r
+    pattern.findFirstMatchIn(ddl).map(_.group(1)).getOrElse("BINDING")
+  }
+
+  test("set EVOLUTION") {
+    val view = s"$catalog.$namespace.v_schema_evolve"
+    createView(view)
+    sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION")
+    assert(schemaModeOf(view) == "EVOLUTION")
+  }
+
+  test("set COMPENSATION") {
+    val view = s"$catalog.$namespace.v_schema_compensate"
+    createView(view)
+    sql(s"ALTER VIEW $view WITH SCHEMA COMPENSATION")
+    assert(schemaModeOf(view) == "COMPENSATION")
+  }
+
+  test("set BINDING (default)") {
+    val view = s"$catalog.$namespace.v_schema_binding"
+    createView(view)
+    sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION")
+    sql(s"ALTER VIEW $view WITH SCHEMA BINDING")
+    assert(schemaModeOf(view) == "BINDING")
+  }
+
+  test("WITH SCHEMA does not change the view body") {
+    val view = s"$catalog.$namespace.v_schema_body_intact"
+    sql(s"CREATE VIEW $view AS SELECT 7 AS x")
+    sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION")
+    checkAnswer(sql(s"SELECT * FROM $view"), org.apache.spark.sql.Row(7))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewSetTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewSetTblPropertiesSuiteBase.scala
new file mode 100644
index 0000000000000..b55ee4563c6eb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewSetTblPropertiesSuiteBase.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+
+/**
+ * This base suite contains unified tests for the `ALTER VIEW ... SET TBLPROPERTIES` command
+ * that check V1 (session) and V2 view catalogs:
+ *
+ *   - V2 view catalog: `org.apache.spark.sql.execution.command.v2.AlterViewSetTblPropertiesSuite`
+ *   - V1 (session) view catalog:
+ *     `org.apache.spark.sql.execution.command.v1.AlterViewSetTblPropertiesSuite`
+ */
+trait AlterViewSetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "ALTER VIEW ... SET TBLPROPERTIES"
+
+  protected def namespace: String = "default"
+
+  protected def createView(view: String): Unit = {
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+  }
+
+  /** Reads back the property value visible via SHOW TBLPROPERTIES, or `None` if absent. */
+  protected def lookupProperty(view: String, key: String): Option[String] = {
+    // SHOW TBLPROPERTIES <view> ('key') always returns a single row; on a missing key the row
+    // carries a "does not have property" placeholder. Iterate the full property listing
+    // instead so absence is unambiguous.
+    sql(s"SHOW TBLPROPERTIES $view").collect()
+      .find(_.getString(0) == key)
+      .map(_.getString(1))
+  }
+
+  test("set a single property") {
+    val view = s"$catalog.$namespace.v_set_one"
+    createView(view)
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v')")
+    assert(lookupProperty(view, "k").contains("v"))
+  }
+
+  test("set multiple properties at once") {
+    val view = s"$catalog.$namespace.v_set_many"
+    createView(view)
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('a' = '1', 'b' = '2')")
+    assert(lookupProperty(view, "a").contains("1"))
+    assert(lookupProperty(view, "b").contains("2"))
+  }
+
+  test("setting overwrites existing property") {
+    val view = s"$catalog.$namespace.v_set_overwrite"
+    createView(view)
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v1')")
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v2')")
+    assert(lookupProperty(view, "k").contains("v2"))
+  }
+
+  test("missing view raises a clean analysis error") {
+    val view = s"$catalog.$namespace.v_missing_set"
+    val ex = intercept[AnalysisException] {
+      sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v')")
+    }
+    // Both v1 and v2 paths surface a TABLE_OR_VIEW_NOT_FOUND-shaped error here; the exact
+    // condition string can differ slightly between paths, so just assert non-empty message.
+    assert(ex.getMessage.contains(view.split('.').last))
+  }
+
+  test("show TBLPROPERTIES reflects the set property") {
+    val view = s"$catalog.$namespace.v_show_after_set"
+    createView(view)
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v')")
+    val rows = sql(s"SHOW TBLPROPERTIES $view").collect()
+    assert(rows.exists(r => r.getString(0) == "k" && r.getString(1) == "v"),
+      s"property k=v missing from SHOW TBLPROPERTIES: ${rows.mkString(", ")}")
+  }
+
+  test("read-after-write returns user-set value") {
+    val view = s"$catalog.$namespace.v_read_after_set"
+    createView(view)
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('answer' = '42')")
+    assert(lookupProperty(view, "answer").contains("42"))
+    val all = sql(s"SHOW TBLPROPERTIES $view").collect()
+    assert(all.length >= 1, s"expected at least one property row, got: ${all.mkString(", ")}")
+  }
+
+  test("setting `comment` flows through to SHOW CREATE TABLE") {
+    // v1 `AlterTableSetPropertiesCommand` updates the typed `CatalogTable.comment` field when
+    // the user passes `'comment'` via SET TBLPROPERTIES, so SHOW CREATE TABLE renders the
+    // comment in the COMMENT clause. The v2 path uses `ViewInfo.properties` as the source of
+    // truth for `PROP_COMMENT` (see `AlterV2ViewSetPropertiesExec` and `ShowCreateV2ViewExec`),
+    // so the same SET TBLPROPERTIES('comment' = ...) round-trips through SHOW CREATE TABLE.
+    // Pin the cross-catalog parity here.
+    val view = s"$catalog.$namespace.v_set_comment"
+    createView(view)
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('comment' = 'a view comment')")
+    val ddl = sql(s"SHOW CREATE TABLE $view").collect().head.getString(0)
+    assert(ddl.contains("a view comment"),
+      s"comment did not flow through to SHOW CREATE TABLE: $ddl")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewUnsetTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewUnsetTblPropertiesSuiteBase.scala
new file mode 100644
index 0000000000000..806e2aaa254fe
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterViewUnsetTblPropertiesSuiteBase.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * Unified tests for `ALTER VIEW ... UNSET TBLPROPERTIES` against V1 (session) and V2 view
+ * catalogs.
+ */
+trait AlterViewUnsetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "ALTER VIEW ... UNSET TBLPROPERTIES"
+
+  protected def namespace: String = "default"
+
+  protected def createViewWithProps(view: String, propPairs: (String, String)*): Unit = {
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    if (propPairs.nonEmpty) {
+      val props = propPairs.map { case (k, v) => s"'$k' = '$v'" }.mkString(", ")
+      sql(s"ALTER VIEW $view SET TBLPROPERTIES ($props)")
+    }
+  }
+
+  protected def lookupProperty(view: String, key: String): Option[String] = {
+    sql(s"SHOW TBLPROPERTIES $view").collect()
+      .find(_.getString(0) == key)
+      .map(_.getString(1))
+  }
+
+  test("unset a single property") {
+    val view = s"$catalog.$namespace.v_unset_one"
+    createViewWithProps(view, "k" -> "v")
+    sql(s"ALTER VIEW $view UNSET TBLPROPERTIES ('k')")
+    assert(lookupProperty(view, "k").isEmpty)
+  }
+
+  test("unset preserves other properties") {
+    val view = s"$catalog.$namespace.v_unset_keeps_others"
+    createViewWithProps(view, "k" -> "v", "other" -> "stay")
+    sql(s"ALTER VIEW $view UNSET TBLPROPERTIES ('k')")
+    assert(lookupProperty(view, "k").isEmpty)
+    assert(lookupProperty(view, "other").contains("stay"))
+  }
+
+  test("unset multiple keys at once") {
+    val view = s"$catalog.$namespace.v_unset_many"
+    createViewWithProps(view, "a" -> "1", "b" -> "2", "c" -> "3")
+    sql(s"ALTER VIEW $view UNSET TBLPROPERTIES ('a', 'b')")
+    assert(lookupProperty(view, "a").isEmpty)
+    assert(lookupProperty(view, "b").isEmpty)
+    assert(lookupProperty(view, "c").contains("3"))
+  }
+
+  test("unset with IF EXISTS on a missing key is a no-op") {
+    val view = s"$catalog.$namespace.v_unset_if_exists"
+    createViewWithProps(view, "k" -> "v")
+    sql(s"ALTER VIEW $view UNSET TBLPROPERTIES IF EXISTS ('not_there')")
+    // Existing property remains.
+    assert(lookupProperty(view, "k").contains("v"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateViewSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateViewSuiteBase.scala
new file mode 100644
index 0000000000000..d046e74b68624
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateViewSuiteBase.scala
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import java.util.Locale
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+
+/**
+ * Unified tests for `CREATE VIEW` against V1 (session) and V2 view catalogs. Source data lives
+ * at `spark_catalog.default.t` for both paths.
+ */
+trait CreateViewSuiteBase extends QueryTest with DDLCommandTestUtils {
+  import testImplicits._
+  override val command: String = "CREATE VIEW"
+
+  protected def namespace: String = "default"
+
+  protected def withSourceTable(values: Int*)(body: => Unit): Unit = {
+    withTable("spark_catalog.default.src") {
+      values.toSeq.toDF("x").write.saveAsTable("spark_catalog.default.src")
+      body
+    }
+  }
+
+  /**
+   * Seed a non-view table at `qualified` (full `catalog.ns.name`) and run `body`. Same SQL
+   * for v1 and v2 -- `InMemoryTableViewCatalog.createTable` accepts the parquet TableInfo
+   * the same way the session catalog does, so both legs share this implementation.
+   */
+  protected final def withSeededTable(qualified: String)(body: => Unit): Unit = {
+    withTable(qualified) {
+      sql(s"CREATE TABLE $qualified (col STRING) USING parquet")
+      body
+    }
+  }
+
+  test("CREATE VIEW persists the body and the SELECT round-trips") {
+    val view = s"$catalog.$namespace.v_create_basic"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.src WHERE x > 1")
+      checkAnswer(spark.table(view), Seq(Row(2), Row(3)))
+    }
+  }
+
+  test("CREATE VIEW IF NOT EXISTS is a no-op when the view already exists") {
+    val view = s"$catalog.$namespace.v_create_ifne"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.src")
+      // Re-running with IF NOT EXISTS must succeed without changing the body.
+      sql(s"CREATE VIEW IF NOT EXISTS $view AS " +
+        s"SELECT x + 100 AS x FROM spark_catalog.default.src")
+      checkAnswer(spark.table(view), Seq(Row(1), Row(2), Row(3)))
+    }
+  }
+
+  test("CREATE VIEW without IF NOT EXISTS fails when the view exists") {
+    val view = s"$catalog.$namespace.v_create_dup"
+    withSourceTable(1) {
+      sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.src")
+      intercept[AnalysisException] {
+        sql(s"CREATE VIEW $view AS SELECT x FROM spark_catalog.default.src")
+      }
+    }
+  }
+
+  test("CREATE OR REPLACE VIEW replaces the body of an existing view") {
+    val view = s"$catalog.$namespace.v_create_replace"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $view AS " +
+        s"SELECT x FROM spark_catalog.default.src WHERE x > 10")
+      checkAnswer(spark.table(view), Seq.empty[Row])
+      sql(s"CREATE OR REPLACE VIEW $view AS " +
+        s"SELECT x FROM spark_catalog.default.src WHERE x > 1")
+      checkAnswer(spark.table(view), Seq(Row(2), Row(3)))
+    }
+  }
+
+  test("CREATE VIEW with a user-specified column list aliases the output") {
+    val view = s"$catalog.$namespace.v_create_cols"
+    withSourceTable(1, 2) {
+      sql(s"CREATE VIEW $view (alpha, beta) AS " +
+        s"SELECT x AS xa, (x + 1) AS xb FROM spark_catalog.default.src")
+      val cols = spark.table(view).schema.fieldNames.toSeq
+      assert(cols == Seq("alpha", "beta"))
+    }
+  }
+
+  test("CREATE VIEW rejects too-few user-specified columns") {
+    val view = s"$catalog.$namespace.v_create_few"
+    withSourceTable(1) {
+      intercept[AnalysisException] {
+        sql(s"CREATE VIEW $view (a) AS " +
+          s"SELECT x AS xa, x AS xb FROM spark_catalog.default.src")
+      }
+    }
+  }
+
+  test("CREATE VIEW rejects too-many user-specified columns") {
+    val view = s"$catalog.$namespace.v_create_many"
+    withSourceTable(1) {
+      intercept[AnalysisException] {
+        sql(s"CREATE VIEW $view (a, b, c) AS SELECT x FROM spark_catalog.default.src")
+      }
+    }
+  }
+
+  test("CREATE VIEW rejects reference to a temporary function") {
+    val view = s"$catalog.$namespace.v_create_tempfn"
+    withSourceTable(1, 2, 3) {
+      spark.udf.register("temp_udf_create", (i: Int) => i + 1)
+      val ex = intercept[AnalysisException] {
+        sql(s"CREATE VIEW $view AS " +
+          s"SELECT temp_udf_create(x) FROM spark_catalog.default.src")
+      }
+      assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("temporary"))
+    }
+  }
+
+  test("CREATE VIEW rejects reference to a temporary view") {
+    val view = s"$catalog.$namespace.v_create_tempview"
+    withTempView("tv_create") {
+      spark.range(3).createOrReplaceTempView("tv_create")
+      val ex = intercept[AnalysisException] {
+        sql(s"CREATE VIEW $view AS SELECT id FROM tv_create")
+      }
+      assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("temporary"))
+    }
+  }
+
+  test("CREATE VIEW rejects reference to a temporary variable") {
+    val view = s"$catalog.$namespace.v_create_tempvar"
+    sql("DECLARE OR REPLACE VARIABLE temp_var_create INT DEFAULT 1")
+    try {
+      val ex = intercept[AnalysisException] {
+        sql(s"CREATE VIEW $view AS SELECT temp_var_create AS x")
+      }
+      assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("temporary"))
+    } finally {
+      sql("DROP TEMPORARY VARIABLE IF EXISTS temp_var_create")
+    }
+  }
+
+  test("CREATE OR REPLACE VIEW detects a direct cyclic reference") {
+    val a = s"$catalog.$namespace.v_create_cycle_a"
+    val b = s"$catalog.$namespace.v_create_cycle_b"
+    withSourceTable(1, 2, 3) {
+      sql(s"CREATE VIEW $a AS SELECT x FROM spark_catalog.default.src")
+      sql(s"CREATE VIEW $b AS SELECT x FROM $a")
+      val ex = intercept[AnalysisException] {
+        sql(s"CREATE OR REPLACE VIEW $a AS SELECT x FROM $b")
+      }
+      assert(ex.getCondition == "RECURSIVE_VIEW")
+    }
+  }
+
+  test("CREATE VIEW over a non-view table entry surfaces the v1-parity errors") {
+    // v1-parity error conditions when CREATE [OR REPLACE | IF NOT EXISTS] VIEW collides
+    // with an existing non-view table at the same identifier. Running on both v1 and v2
+    // pins parity from each side -- v1 hits the conditions through the long-established
+    // session-catalog path, v2 hits them through the new `CreateV2ViewExec`.
+    val view = s"$catalog.$namespace.v_create_table_collide"
+    withSeededTable(view) {
+      // CREATE OR REPLACE VIEW must not silently destroy a non-view table.
+      val replaceEx = intercept[AnalysisException] {
+        sql(s"CREATE OR REPLACE VIEW $view AS SELECT 1 AS col")
+      }
+      assert(replaceEx.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE")
+
+      // Plain CREATE VIEW over a table surfaces TABLE_OR_VIEW_ALREADY_EXISTS.
+      val createEx = intercept[AnalysisException] {
+        sql(s"CREATE VIEW $view AS SELECT 1 AS col")
+      }
+      assert(createEx.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS")
+
+      // CREATE VIEW IF NOT EXISTS is a no-op -- the existing table is left alone.
+      sql(s"CREATE VIEW IF NOT EXISTS $view AS SELECT 1 AS col")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index d9b91946ded77..9997b6c7bb385 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -731,7 +731,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       parser.parsePlan(sql).collect {
       case CreateTableLike(
           UnresolvedIdentifier(targetParts, _),
-          UnresolvedTableOrView(sourceParts, _, _),
+          UnresolvedTableOrView(sourceParts, _, _, _),
           loc, p, _, pr, e) =>
         (targetParts, sourceParts, loc, p, pr, e)
     }.head
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index b05ee5abd033a..c4715b6a37efc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1091,7 +1091,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         "alternative" -> "DROP TABLE",
         "operation" -> "DROP VIEW",
         "foundType" -> "EXTERNAL",
-        "requiredType" -> "VIEW",
+        "requiredType" -> "VIEW or METRIC_VIEW",
         "objectName" -> "spark_catalog.dbx.tab1")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
index 436fa2e2389aa..ebe8eaf91d56a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
@@ -19,33 +19,40 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute,
-  UnresolvedPartitionSpec, UnresolvedTableOrView}
+  UnresolvedPartitionSpec, UnresolvedTableOrView, UnresolvedTableOrViewSearchPathMode}
 import org.apache.spark.sql.catalyst.plans.logical.{DescribeColumn, DescribeRelation,
   DescribeTablePartition}
 import org.apache.spark.sql.test.SharedSparkSession
 
 class DescribeTableParserSuite extends SharedSparkSession with AnalysisTest {
   private def parsePlan(statement: String) = spark.sessionState.sqlParser.parsePlan(statement)
+  private def unresolvedDescribeTable(name: String): UnresolvedTableOrView = {
+    UnresolvedTableOrView(
+      Seq(name),
+      "DESCRIBE TABLE",
+      allowTempView = true,
+      UnresolvedTableOrViewSearchPathMode.QueryLike)
+  }
 
   test("SPARK-17328: Fix NPE with EXPLAIN DESCRIBE TABLE") {
     comparePlans(parsePlan("describe t"),
       DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), isExtended = false))
+        unresolvedDescribeTable("t"), isExtended = false))
     comparePlans(parsePlan("describe table t"),
       DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), isExtended = false))
+        unresolvedDescribeTable("t"), isExtended = false))
     comparePlans(parsePlan("describe table extended t"),
       DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), isExtended = true))
+        unresolvedDescribeTable("t"), isExtended = true))
     comparePlans(parsePlan("describe table formatted t"),
       DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), isExtended = true))
+        unresolvedDescribeTable("t"), isExtended = true))
   }
 
   test("describe table with partition spec") {
     comparePlans(parsePlan("DESCRIBE TABLE t PARTITION (ds='2024-01-01')"),
       DescribeTablePartition(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedPartitionSpec(Map("ds" -> "2024-01-01")),
         isExtended = false))
   }
@@ -53,38 +60,38 @@ class DescribeTableParserSuite extends SharedSparkSession with AnalysisTest {
   test("describe table column") {
     comparePlans(parsePlan("DESCRIBE t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedAttribute(Seq("col")),
         isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedAttribute(Seq("abc.xyz")),
         isExtended = false))
     comparePlans(parsePlan("DESCRIBE t abc.xyz"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedAttribute(Seq("abc", "xyz")),
         isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedAttribute(Seq("a.b", "x.y")),
         isExtended = false))
 
     comparePlans(parsePlan("DESCRIBE TABLE t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedAttribute(Seq("col")),
         isExtended = false))
     comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedAttribute(Seq("col")),
         isExtended = true))
     comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        unresolvedDescribeTable("t"),
         UnresolvedAttribute(Seq("col")),
         isExtended = true))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeViewColumnSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeViewColumnSuiteBase.scala
new file mode 100644
index 0000000000000..5969354c443f0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeViewColumnSuiteBase.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * Unified tests for `DESCRIBE TABLE ... <column>` against a view, on V1 (session) and V2 view
+ * catalogs.
+ */
+trait DescribeViewColumnSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "DESCRIBE TABLE COLUMN on view"
+
+  protected def namespace: String = "default"
+
+  test("emits col_name / data_type / comment rows") {
+    val view = s"$catalog.$namespace.v_desc_col_basic"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS a")
+    val rows = sql(s"DESCRIBE TABLE $view a").collect()
+    val labels = rows.map(_.getString(0)).toSet
+    assert(labels.contains("col_name"))
+    assert(labels.contains("data_type"))
+    assert(labels.contains("comment"))
+  }
+
+  test("data_type matches the column type") {
+    val view = s"$catalog.$namespace.v_desc_col_type"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS a, 'x' AS b")
+    val rows = sql(s"DESCRIBE TABLE $view b").collect()
+    val pairs = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+    assert(pairs.get("col_name").contains("b"))
+    assert(pairs.get("data_type").contains("string"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeViewSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeViewSuiteBase.scala
new file mode 100644
index 0000000000000..4520a9580e02c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeViewSuiteBase.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * Unified tests for `DESCRIBE TABLE` against a view, on V1 (session) and V2 view catalogs.
+ */
+trait DescribeViewSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "DESCRIBE TABLE on view"
+
+  protected def namespace: String = "default"
+
+  test("describe emits one row per column") {
+    val view = s"$catalog.$namespace.v_describe_basic"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS a, 'x' AS b")
+    val rows = sql(s"DESCRIBE TABLE $view").collect()
+    val cols = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+    assert(cols.get("a").contains("int"))
+    assert(cols.get("b").contains("string"))
+  }
+
+  test("describe extended emits a detailed-info block for the view") {
+    val view = s"$catalog.$namespace.v_describe_extended"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    val rows = sql(s"DESCRIBE TABLE EXTENDED $view").collect().map(_.getString(0))
+    // v1 and v2 paths render slightly different headers ('# Detailed Table Information' vs
+    // '# Detailed View Information'); accept either.
+    assert(
+      rows.contains("# Detailed Table Information") ||
+        rows.contains("# Detailed View Information"),
+      s"expected a detailed-info block in:\n${rows.mkString("\n")}")
+  }
+
+  test("describe extended includes Catalog and View Text rows") {
+    // Both v1 (`DescribeTableCommand` over a `CatalogTable` of type VIEW) and v2
+    // (`DescribeV2ViewExec`) emit a `Catalog` row carrying the resolved catalog name and a
+    // `View Text` row containing the view body, so users can read the actual definition out
+    // of EXTENDED rather than going to SHOW CREATE TABLE for it.
+    val view = s"$catalog.$namespace.v_describe_ext_body"
+    sql(s"CREATE VIEW $view AS SELECT 7 AS x")
+    val rows = sql(s"DESCRIBE TABLE EXTENDED $view").collect()
+    val pairs = rows.map(r => r.getString(0) -> Option(r.getString(1)).getOrElse("")).toMap
+    assert(pairs.get("Catalog").contains(catalog),
+      s"expected Catalog=$catalog in:\n$pairs")
+    assert(pairs.get("View Text").exists(_.contains("SELECT 7 AS x")),
+      s"expected View Text containing 'SELECT 7 AS x' in:\n$pairs")
+  }
+
+  test("describe extended promotes Comment and Collation to top-level rows") {
+    // v1 `CatalogTable.toJsonLinkedHashMap` and v2 `DescribeV2ViewExec` both render Comment /
+    // Collation as their own rows in the EXTENDED block, separately from the generic
+    // Properties row, so users don't have to scrape the Properties string for first-class
+    // fields.
+    val view = s"$catalog.$namespace.v_describe_first_class"
+    sql(s"CREATE VIEW $view COMMENT 'hello' DEFAULT COLLATION UTF8_LCASE AS SELECT 'a' AS x")
+    val rows = sql(s"DESCRIBE TABLE EXTENDED $view").collect().map { r =>
+      r.getString(0) -> Option(r.getString(1)).getOrElse("")
+    }.toMap
+    assert(rows.get("Comment").contains("hello"),
+      s"expected Comment=hello in:\n$rows")
+    // v1 renders the collation name verbatim (UTF8_LCASE); v2 does the same.
+    assert(rows.get("Collation").exists(_.equalsIgnoreCase("UTF8_LCASE")),
+      s"expected Collation=UTF8_LCASE in:\n$rows")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropViewSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropViewSuiteBase.scala
new file mode 100644
index 0000000000000..92a368e4155a6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropViewSuiteBase.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+
+/**
+ * Unified tests for `DROP VIEW` against V1 (session) and V2 view catalogs.
+ */
+trait DropViewSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "DROP VIEW"
+
+  protected def namespace: String = "default"
+
+  protected def viewExists(qualified: String): Boolean = {
+    val parts = qualified.split('.').toSeq
+    val nsAndView = parts.tail
+    val ns = nsAndView.init.mkString(".")
+    val name = nsAndView.last
+    sql(s"SHOW VIEWS IN $catalog.$ns").collect().exists(_.getString(1) == name)
+  }
+
+  /**
+   * Seed a non-view table at `qualified` (full `catalog.ns.name`) and run `body`. Same SQL
+   * for v1 and v2 -- `InMemoryTableViewCatalog.createTable` accepts the parquet TableInfo
+   * the same way the session catalog does, so both legs share this implementation.
+   */
+  protected final def withSeededTable(qualified: String)(body: => Unit): Unit = {
+    withTable(qualified) {
+      sql(s"CREATE TABLE $qualified (col STRING) USING parquet")
+      body
+    }
+  }
+
+  test("drop existing view") {
+    val view = s"$catalog.$namespace.v_drop_basic"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    assert(viewExists(view))
+    sql(s"DROP VIEW $view")
+    assert(!viewExists(view))
+  }
+
+  test("drop missing view fails without IF EXISTS") {
+    val view = s"$catalog.$namespace.v_drop_missing"
+    intercept[AnalysisException] {
+      sql(s"DROP VIEW $view")
+    }
+  }
+
+  test("drop with IF EXISTS is a no-op when missing") {
+    sql(s"DROP VIEW IF EXISTS $catalog.$namespace.v_drop_never_existed")
+  }
+
+  test("DROP VIEW on a non-view table entry surfaces WRONG_COMMAND_FOR_OBJECT_TYPE") {
+    // Both v1 `DropTableCommand` and v2 `DropViewExec` route this case to
+    // `WRONG_COMMAND_FOR_OBJECT_TYPE`, which renders "Use DROP TABLE instead" -- giving the
+    // user the right command to retry. The `alternative` parameter on the rendered message
+    // surfaces the suggestion that subclassed `EXPECT_*` errors otherwise carry only via
+    // their subclass name.
+    val view = s"$catalog.$namespace.v_drop_table_collide"
+    withSeededTable(view) {
+      val ex = intercept[AnalysisException] {
+        sql(s"DROP VIEW $view")
+      }
+      assert(ex.getCondition == "WRONG_COMMAND_FOR_OBJECT_TYPE",
+        s"unexpected error condition: ${ex.getCondition}")
+      assert(ex.getMessage.contains("Use DROP TABLE instead"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 89fb6eca223ee..cd917a817f7f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -139,6 +139,12 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest {
       .add("s", "string")
       .add("point", new StructType().add("x", "int").add("y", "int")))
     when(t.tableType).thenReturn(tableType)
+    // Mockito returns false for unstubbed Boolean methods, so analyzer code paths that
+    // dispatch through `CatalogTable.isViewLike` (e.g. `Analyzer.lookupTableOrView`'s v1
+    // session-catalog branch) would misclassify a mocked VIEW fixture as a table. Stub
+    // the method to compute from the just-stubbed `tableType` so any view-like type
+    // (VIEW today, METRIC_VIEW or future kinds) resolves correctly.
+    when(t.isViewLike).thenReturn(CatalogTable.isViewLike(tableType))
     when(t.provider).thenReturn(Some(provider))
     when(t.identifier).thenReturn(
       ident.asTableIdentifier.copy(catalog = Some(SESSION_CATALOG_NAME)))
@@ -146,6 +152,15 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest {
     when(t.properties).thenReturn(Map.empty)
     when(t.comment).thenReturn(None)
     when(t.collation).thenReturn(None)
+    if (tableType == CatalogTableType.VIEW) {
+      // Stub the view-only fields that resolution reads through `V1ViewInfo.builderFrom`.
+      // Mockito returns `null` for unstubbed Object methods, which would NPE the moment
+      // builderFrom calls `.getOrElse` / `.asJava` / `.toArray` on a null Option/Seq/Map.
+      when(t.viewText).thenReturn(None)
+      when(t.viewCatalogAndNamespace).thenReturn(Seq.empty)
+      when(t.viewSQLConfigs).thenReturn(Map.empty)
+      when(t.viewQueryColumnNames).thenReturn(Seq.empty)
+    }
     V1Table(t)
   }
 
@@ -221,6 +236,16 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest {
     when(manager.currentNamespace).thenReturn(Array.empty[String])
     when(manager.v1SessionCatalog).thenReturn(v1SessionCatalog)
     when(manager.tempVariableManager).thenReturn(tempVariableManager)
+    when(manager.sessionPathEntries).thenReturn(None)
+    val defaultPath = SQLConf.get.resolutionSearchPath(Seq(testCat.name()))
+    when(manager.sqlResolutionPathEntries(
+      any[String], any[Seq[String]], any[String], any[Seq[String]]))
+      .thenReturn(defaultPath)
+    when(manager.sqlResolutionPathEntries(any[String], any[Seq[String]]))
+      .thenReturn(defaultPath)
+    when(manager.resolutionPathEntriesForAnalysis(
+      any[Option[Seq[Seq[String]]]], any[Seq[String]]))
+      .thenReturn(defaultPath)
     manager
   }
 
@@ -230,6 +255,8 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest {
       invocation.getArguments()(0).asInstanceOf[String] match {
         case "testcat" =>
           testCat
+        case CatalogManager.SESSION_CATALOG_NAME =>
+          v2SessionCatalog
         case name => throw QueryExecutionErrors.catalogNotFoundError(name)
       }
     })
@@ -237,6 +264,17 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest {
     when(manager.currentNamespace).thenReturn(Array("default"))
     when(manager.v1SessionCatalog).thenReturn(v1SessionCatalog)
     when(manager.tempVariableManager).thenReturn(tempVariableManager)
+    when(manager.sessionPathEntries).thenReturn(None)
+    val defaultPath2 = SQLConf.get.resolutionSearchPath(
+      (v2SessionCatalog.name() +: Array("default")).toSeq)
+    when(manager.sqlResolutionPathEntries(
+      any[String], any[Seq[String]], any[String], any[Seq[String]]))
+      .thenReturn(defaultPath2)
+    when(manager.sqlResolutionPathEntries(any[String], any[Seq[String]]))
+      .thenReturn(defaultPath2)
+    when(manager.resolutionPathEntriesForAnalysis(
+      any[Option[Seq[Seq[String]]]], any[Seq[String]]))
+      .thenReturn(defaultPath2)
     manager
   }
 
@@ -778,8 +816,8 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest {
     }
     checkError(
       e,
-      condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
-      parameters = Map("catalogName" -> "`testcat`", "operation" -> "views"))
+      condition = "MISSING_CATALOG_ABILITY.VIEWS",
+      parameters = Map("plugin" -> "testcat"))
   }
 
   // ALTER VIEW view_name SET TBLPROPERTIES ('comment' = new_comment);
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateViewSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateViewSuiteBase.scala
new file mode 100644
index 0000000000000..08b10bad7f3df
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowCreateViewSuiteBase.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * Unified tests for `SHOW CREATE TABLE` against a view, on V1 (session) and V2 view catalogs.
+ */
+trait ShowCreateViewSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "SHOW CREATE TABLE on view"
+
+  protected def namespace: String = "default"
+
+  test("emits CREATE VIEW prefix") {
+    val view = s"$catalog.$namespace.v_show_create_basic"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    val ddl = sql(s"SHOW CREATE TABLE $view").collect().head.getString(0)
+    assert(ddl.startsWith("CREATE VIEW "), s"unexpected DDL: $ddl")
+    assert(ddl.contains("AS SELECT 1 AS x"), s"unexpected DDL: $ddl")
+  }
+
+  test("includes user-set TBLPROPERTIES") {
+    val view = s"$catalog.$namespace.v_show_create_props"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v')")
+    val ddl = sql(s"SHOW CREATE TABLE $view").collect().head.getString(0)
+    assert(ddl.contains("'k' = 'v'"), s"property missing in DDL: $ddl")
+  }
+
+  test("renders the column list") {
+    val view = s"$catalog.$namespace.v_show_create_cols"
+    sql(s"CREATE VIEW $view (col_alpha, col_beta) AS SELECT 1, 2")
+    val ddl = sql(s"SHOW CREATE TABLE $view").collect().head.getString(0)
+    // Use distinctive column names so `contains` checks aren't satisfied by tokens that
+    // appear elsewhere in the rendered DDL (e.g. inside `CREATE VIEW`, `AS SELECT`, etc.).
+    assert(ddl.contains("col_alpha"), s"col_alpha missing in DDL: $ddl")
+    assert(ddl.contains("col_beta"), s"col_beta missing in DDL: $ddl")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewColumnsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewColumnsSuiteBase.scala
new file mode 100644
index 0000000000000..99997118b8e37
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewColumnsSuiteBase.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+
+/**
+ * Unified tests for `SHOW COLUMNS` against a view, on V1 (session) and V2 view catalogs.
+ */
+trait ShowViewColumnsSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "SHOW COLUMNS on view"
+
+  protected def namespace: String = "default"
+
+  test("returns one row per column") {
+    val view = s"$catalog.$namespace.v_show_cols_basic"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS a, 'x' AS b")
+    val cols = sql(s"SHOW COLUMNS IN $view").collect().map(_.getString(0)).toSeq
+    assert(cols == Seq("a", "b"))
+  }
+
+  test("respects user-specified column list on the view") {
+    val view = s"$catalog.$namespace.v_show_cols_aliased"
+    sql(s"CREATE VIEW $view (alpha, beta) AS SELECT 1, 2")
+    val cols = sql(s"SHOW COLUMNS IN $view").collect().map(_.getString(0)).toSeq
+    assert(cols == Seq("alpha", "beta"))
+  }
+
+  test("FROM <ns> mismatching the view's namespace is rejected") {
+    // `SHOW COLUMNS IN <view> FROM <ns>` cross-checks the view's resolved namespace against
+    // the explicit FROM namespace. Mismatch must error rather than silently ignoring FROM --
+    // v1 enforces this in `ResolveSessionCatalog`; v2 enforces it in `DataSourceV2Strategy`.
+    val view = s"$catalog.$namespace.v_show_cols_ns"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS a")
+    val ex = intercept[AnalysisException] {
+      sql(s"SHOW COLUMNS IN $view FROM other_ns")
+    }
+    assert(ex.getCondition == "SHOW_COLUMNS_WITH_CONFLICT_NAMESPACE",
+      s"unexpected error condition: ${ex.getCondition}")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewPropertiesSuiteBase.scala
new file mode 100644
index 0000000000000..873b65a42ddf0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewPropertiesSuiteBase.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * Unified tests for `SHOW TBLPROPERTIES` against a view, on V1 (session) and V2 view catalogs.
+ */
+trait ShowViewPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "SHOW TBLPROPERTIES on view"
+
+  protected def namespace: String = "default"
+
+  test("returns user-set property by key") {
+    val view = s"$catalog.$namespace.v_show_props_one"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v')")
+    val rows = sql(s"SHOW TBLPROPERTIES $view ('k')").collect()
+    // SHOW TBLPROPERTIES <view> ('key') returns either (value) or (key, value) -- check the
+    // last column either way.
+    assert(rows.head.getString(rows.head.length - 1) == "v")
+  }
+
+  test("returns all user-set properties") {
+    val view = s"$catalog.$namespace.v_show_props_all"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('a' = '1', 'b' = '2')")
+    val rows = sql(s"SHOW TBLPROPERTIES $view").collect()
+    val pairs = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+    assert(pairs.get("a").contains("1"))
+    assert(pairs.get("b").contains("2"))
+  }
+
+  test("missing key returns the default not-found row") {
+    val view = s"$catalog.$namespace.v_show_props_missing_key"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    val rows = sql(s"SHOW TBLPROPERTIES $view ('not_there')").collect()
+    val value = rows.head.getString(rows.head.length - 1)
+    assert(value.contains("does not have property"),
+      s"expected a not-found message, got: $value")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewsSuiteBase.scala
new file mode 100644
index 0000000000000..83bd04c2a9fb1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowViewsSuiteBase.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * Unified tests for `SHOW VIEWS` against V1 (session) and V2 view catalogs.
+ */
+trait ShowViewsSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command: String = "SHOW VIEWS"
+
+  protected def namespace: String = "default"
+
+  test("returns user-created views") {
+    sql(s"CREATE VIEW $catalog.$namespace.v_show_views_a AS SELECT 1 AS x")
+    sql(s"CREATE VIEW $catalog.$namespace.v_show_views_b AS SELECT 2 AS x")
+    val rows = sql(s"SHOW VIEWS IN $catalog.$namespace").collect()
+    val names = rows.map(_.getString(1)).toSet
+    assert(names.contains("v_show_views_a"), s"v_show_views_a missing: $names")
+    assert(names.contains("v_show_views_b"), s"v_show_views_b missing: $names")
+  }
+
+  test("LIKE pattern filters by name") {
+    sql(s"CREATE VIEW $catalog.$namespace.show_views_match AS SELECT 1 AS x")
+    sql(s"CREATE VIEW $catalog.$namespace.show_views_skip AS SELECT 1 AS x")
+    val rows = sql(s"SHOW VIEWS IN $catalog.$namespace LIKE 'show_views_match'").collect()
+    val names = rows.map(_.getString(1)).toSet
+    assert(names.contains("show_views_match"))
+    assert(!names.contains("show_views_skip"))
+  }
+
+  test("does not include non-view table entries") {
+    // SHOW VIEWS lists views and only views. Both v1 (session catalog routing through
+    // ShowTablesCommand-with-views-only) and v2 (`ShowViewsExec` routing through
+    // `ViewCatalog.listViews`) should exclude tables, and both must mark `isTemporary` as
+    // false for persistent view rows.
+    val viewName = "v_show_views_only"
+    val tableName = "t_not_in_show_views"
+    val table = s"$catalog.$namespace.$tableName"
+    sql(s"CREATE VIEW $catalog.$namespace.$viewName AS SELECT 1 AS x")
+    withTable(table) {
+      sql(s"CREATE TABLE $table (x INT) USING parquet")
+      val rows = sql(s"SHOW VIEWS IN $catalog.$namespace").collect()
+      val names = rows.map(_.getString(1)).toSet
+      assert(names.contains(viewName), s"$viewName missing from SHOW VIEWS: $names")
+      assert(!names.contains(tableName), s"non-view leaked into SHOW VIEWS: $names")
+      rows.foreach(r => assert(!r.getBoolean(2),
+        s"isTemporary must be false for persistent view rows: $r"))
+    }
+  }
+}
diff --git a/udf/worker/core/src/test/scala/org/apache/spark/udf/worker/core/WorkerAbstractionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewAsSuite.scala
similarity index 78%
rename from udf/worker/core/src/test/scala/org/apache/spark/udf/worker/core/WorkerAbstractionSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewAsSuite.scala
index 42f53af07424a..c2f6851b2ae04 100644
--- a/udf/worker/core/src/test/scala/org/apache/spark/udf/worker/core/WorkerAbstractionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewAsSuite.scala
@@ -14,12 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.udf.worker.core
 
-import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
+package org.apache.spark.sql.execution.command.v1
 
-class WorkerAbstractionSuite
-    extends AnyFunSuite { // scalastyle:ignore funsuite
+import org.apache.spark.sql.execution.command
 
-  test("dummy") {}
-}
+class AlterViewAsSuite extends command.AlterViewAsSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewRenameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewRenameSuite.scala
new file mode 100644
index 0000000000000..9c2bcb2b90f4f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewRenameSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class AlterViewRenameSuite
+  extends command.AlterViewRenameSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala
new file mode 100644
index 0000000000000..6b11748565291
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+
+class AlterViewSchemaBindingSuite
+  extends command.AlterViewSchemaBindingSuiteBase with ViewCommandSuiteBase {
+
+  test("ALTER VIEW ... WITH SCHEMA preserves the frozen SQL path") {
+    // `generateViewProperties(captureNewPath = false)` is the documented behavior for
+    // ALTER VIEW WITH SCHEMA: the view's body resolution path must stay pinned to the
+    // create-time PATH, not the caller's current PATH. This test creates the view under
+    // PATH=a, then runs ALTER VIEW WITH SCHEMA EVOLUTION under PATH=b, and asserts that
+    // the persisted VIEW_RESOLUTION_PATH still reflects PATH=a.
+    withSQLConf(SQLConf.PATH_ENABLED.key -> "true") {
+      val viewName = "v_path_preserved_on_alter"
+      val view = s"$catalog.$namespace.$viewName"
+      sql(s"CREATE SCHEMA IF NOT EXISTS $catalog.alter_view_path_a")
+      try {
+        sql(s"SET PATH = $catalog.alter_view_path_a, system.builtin")
+        sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+        val pathAfterCreate = spark.sessionState.catalog
+          .getTableMetadata(TableIdentifier(viewName, Some(namespace)))
+          .viewStoredResolutionPath
+          .getOrElse(fail("Expected the view to persist a frozen SQL path"))
+        val parsedCreate = CatalogManager.deserializePathEntries(pathAfterCreate)
+          .getOrElse(fail(s"Expected a valid serialized path, got: $pathAfterCreate"))
+        assert(parsedCreate.contains(Seq(catalog, "alter_view_path_a")),
+          s"Frozen path should include alter_view_path_a; got: $parsedCreate")
+
+        // Switch the live PATH to something else and run ALTER VIEW WITH SCHEMA.
+        // The captureNewPath = false code path must NOT overwrite the frozen path.
+        sql(s"SET PATH = $catalog.default, system.builtin")
+        sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION")
+
+        val pathAfterAlter = spark.sessionState.catalog
+          .getTableMetadata(TableIdentifier(viewName, Some(namespace)))
+          .viewStoredResolutionPath
+          .getOrElse(fail("Frozen SQL path was dropped by ALTER VIEW WITH SCHEMA"))
+        assert(pathAfterAlter == pathAfterCreate,
+          s"ALTER VIEW WITH SCHEMA must preserve the frozen path. " +
+            s"Before: $pathAfterCreate; after: $pathAfterAlter")
+      } finally {
+        sql("SET PATH = DEFAULT_PATH")
+        sql(s"DROP VIEW IF EXISTS $view")
+        sql(s"DROP SCHEMA IF EXISTS $catalog.alter_view_path_a")
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSetTblPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSetTblPropertiesSuite.scala
new file mode 100644
index 0000000000000..477c7288d0182
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSetTblPropertiesSuite.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `ALTER VIEW ... SET TBLPROPERTIES` command on V1
+ * (session-catalog) views.
+ */
+class AlterViewSetTblPropertiesSuite
+  extends command.AlterViewSetTblPropertiesSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewUnsetTblPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewUnsetTblPropertiesSuite.scala
new file mode 100644
index 0000000000000..a1caab3de1c6c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewUnsetTblPropertiesSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class AlterViewUnsetTblPropertiesSuite
+  extends command.AlterViewUnsetTblPropertiesSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CreateViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CreateViewSuite.scala
new file mode 100644
index 0000000000000..383fbdb2eca41
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CreateViewSuite.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class CreateViewSuite extends command.CreateViewSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeViewColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeViewColumnSuite.scala
new file mode 100644
index 0000000000000..ce72f5e839d80
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeViewColumnSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class DescribeViewColumnSuite
+  extends command.DescribeViewColumnSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeViewSuite.scala
new file mode 100644
index 0000000000000..3d9ed0bd7b486
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeViewSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class DescribeViewSuite
+  extends command.DescribeViewSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropViewSuite.scala
new file mode 100644
index 0000000000000..c8805d67391bb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropViewSuite.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class DropViewSuite extends command.DropViewSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateViewSuite.scala
new file mode 100644
index 0000000000000..17b5009403c15
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateViewSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class ShowCreateViewSuite
+  extends command.ShowCreateViewSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewColumnsSuite.scala
new file mode 100644
index 0000000000000..960092f452385
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewColumnsSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class ShowViewColumnsSuite
+  extends command.ShowViewColumnsSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewPropertiesSuite.scala
new file mode 100644
index 0000000000000..e76dc5e6f7cc6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewPropertiesSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class ShowViewPropertiesSuite
+  extends command.ShowViewPropertiesSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewsSuite.scala
new file mode 100644
index 0000000000000..53d218cda3f4d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowViewsSuite.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+class ShowViewsSuite extends command.ShowViewsSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ViewCommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ViewCommandSuiteBase.scala
new file mode 100644
index 0000000000000..4997052880796
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ViewCommandSuiteBase.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+/**
+ * Settings for v1 view command test suites. The session catalog hosts views natively; reuse
+ * v1 [[CommandSuiteBase]] so view tests inherit its `checkLocation` and other helpers.
+ */
+trait ViewCommandSuiteBase extends CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
new file mode 100644
index 0000000000000..a3a023ec4d5c3
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewAsSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.{BasicInMemoryTableCatalog, TableCatalog}
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StringType
+
+class AlterViewAsSuite extends command.AlterViewAsSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: ALTER NAMESPACE DEFAULT COLLATION does not retroactively change a view's collation") {
+    // A view created in a namespace without a default collation keeps its creation-time
+    // collation behavior. A later `ALTER NAMESPACE ... DEFAULT COLLATION` followed by
+    // `ALTER VIEW AS` does not fold the new namespace default into the view -- `PROP_COLLATION`
+    // stays at its create-time value (empty here) and the body literals stay UTF8_BINARY.
+    withSQLConf(SQLConf.SCHEMA_LEVEL_COLLATIONS_ENABLED.key -> "true") {
+      val viewName = "v2_alter_collation_inherit"
+      val view = s"$catalog.$namespace.$viewName"
+      sql(s"CREATE VIEW $view AS SELECT 'a' AS c1")
+      assert(Option(viewCatalog
+        .getStoredView(Array(namespace), viewName)
+        .properties()
+        .get(TableCatalog.PROP_COLLATION))
+        .isEmpty)
+
+      sql(s"ALTER NAMESPACE $catalog.$namespace DEFAULT COLLATION UTF8_LCASE")
+      sql(s"ALTER VIEW $view AS SELECT 'x' AS c1, 'y' AS c2")
+
+      val stored = viewCatalog.getStoredView(Array(namespace), viewName)
+      assert(Option(stored.properties().get(TableCatalog.PROP_COLLATION)).isEmpty)
+      val df = spark.table(view)
+      assert(df.schema("c1").dataType === StringType)
+      assert(df.schema("c2").dataType === StringType)
+    }
+  }
+
+  test("V2: ALTER VIEW after ALTER NAMESPACE DEFAULT COLLATION keeps the inherited collation") {
+    withSQLConf(SQLConf.SCHEMA_LEVEL_COLLATIONS_ENABLED.key -> "true") {
+      val ns = "ns"
+      val viewName = "v2"
+      val view = s"$catalog.$ns.$viewName"
+      withNamespace(s"$catalog.$ns") {
+        sql(s"CREATE NAMESPACE $catalog.$ns DEFAULT COLLATION UTF8_LCASE")
+        sql(s"CREATE VIEW $view AS SELECT 'a' AS c1")
+        assert(viewCatalog
+          .getStoredView(Array(ns), viewName)
+          .properties()
+          .get(TableCatalog.PROP_COLLATION) == "UTF8_LCASE")
+
+        sql(s"ALTER NAMESPACE $catalog.$ns DEFAULT COLLATION UNICODE")
+        sql(s"ALTER VIEW $view AS SELECT 'x' AS c1, 'y' AS c2")
+
+        val stored = viewCatalog.getStoredView(Array(ns), viewName)
+        assert(stored.properties().get(TableCatalog.PROP_COLLATION) == "UTF8_LCASE")
+        val df = spark.table(view)
+        assert(df.schema("c1").dataType === StringType("UTF8_LCASE"))
+        assert(df.schema("c2").dataType === StringType("UTF8_LCASE"))
+      }
+    }
+  }
+
+  test("V2: ALTER VIEW preserves PROP_OWNER (v1-parity)") {
+    val view = s"$catalog.$namespace.v2_alter_keep_owner"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    val ownerBefore = viewCatalog.getStoredView(Array(namespace), "v2_alter_keep_owner")
+      .properties().get(TableCatalog.PROP_OWNER)
+    sql(s"ALTER VIEW $view AS SELECT 2 AS x")
+    val ownerAfter = viewCatalog.getStoredView(Array(namespace), "v2_alter_keep_owner")
+      .properties().get(TableCatalog.PROP_OWNER)
+    assert(ownerBefore == ownerAfter)
+  }
+
+  test("V2: ALTER VIEW re-captures the current session's SQL configs") {
+    val view = s"$catalog.$namespace.v2_alter_reconfig"
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    }
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      sql(s"ALTER VIEW $view AS SELECT 2 AS x")
+    }
+    val stored = viewCatalog.getStoredView(Array(namespace), "v2_alter_reconfig")
+    val captured = stored.sqlConfigs().get(SQLConf.ANSI_ENABLED.key)
+    assert(captured == "true",
+      s"expected ALTER VIEW to re-capture ansi=true; got $captured")
+  }
+
+  test("V2: ALTER VIEW on non-ViewCatalog catalog fails with MISSING_CATALOG_ABILITY") {
+    withSQLConf(
+      "spark.sql.catalog.no_view_alter_cat" -> classOf[BasicInMemoryTableCatalog].getName) {
+      val ex = intercept[AnalysisException] {
+        sql("ALTER VIEW no_view_alter_cat.default.does_not_matter AS SELECT 1")
+      }
+      assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewRenameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewRenameSuite.scala
new file mode 100644
index 0000000000000..54174387ef3b9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewRenameSuite.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.execution.command
+
+class AlterViewRenameSuite
+  extends command.AlterViewRenameSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: catalog state moves the entry between identifiers") {
+    val src = s"$catalog.$namespace.v2_rename_src"
+    createView(src)
+    sql(s"ALTER VIEW $src RENAME TO v2_rename_dst")
+    assert(!viewCatalog.viewExists(Identifier.of(Array(namespace), "v2_rename_src")))
+    assert(viewCatalog.viewExists(Identifier.of(Array(namespace), "v2_rename_dst")))
+  }
+
+  test("V2: rename to a 2-part target moves the view across namespaces") {
+    // Exercises `RenameV2ViewExec`'s non-empty-namespace branch -- when the new identifier
+    // already carries a namespace, the exec passes it through verbatim rather than defaulting
+    // to the source namespace.
+    val src = s"$catalog.$namespace.v2_rename_xns_src"
+    val otherNs = "other_ns"
+    sql(s"CREATE NAMESPACE IF NOT EXISTS $catalog.$otherNs")
+    createView(src)
+    try {
+      sql(s"ALTER VIEW $src RENAME TO $otherNs.v2_rename_xns_dst")
+      assert(!viewCatalog.viewExists(Identifier.of(Array(namespace), "v2_rename_xns_src")))
+      assert(viewCatalog.viewExists(Identifier.of(Array(otherNs), "v2_rename_xns_dst")))
+    } finally {
+      sql(s"DROP VIEW IF EXISTS $catalog.$otherNs.v2_rename_xns_dst")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewSchemaBindingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewSchemaBindingSuite.scala
new file mode 100644
index 0000000000000..517880047d256
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewSchemaBindingSuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+class AlterViewSchemaBindingSuite
+  extends command.AlterViewSchemaBindingSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: catalog stores the new schema mode on ViewInfo") {
+    val view = s"$catalog.$namespace.v2_schema_mode"
+    createView(view)
+    sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION")
+    val stored = viewCatalog.getStoredView(Array(namespace), "v2_schema_mode")
+    assert(stored.schemaMode == "EVOLUTION")
+  }
+
+  test("V2: switching to EVOLUTION clears queryColumnNames; switching back restores them") {
+    // Mirrors v1 `generateViewProperties`: in EVOLUTION mode the view always uses its current
+    // schema as the column source, so persisting `queryColumnNames` would be non-canonical.
+    // After flipping back to BINDING via a CREATE OR REPLACE (which re-captures column names),
+    // the field must be populated again so view-text expansion has the original aliases.
+    val name = "v2_schema_mode_qcols"
+    val view = s"$catalog.$namespace.$name"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x, 2 AS y")
+    val initial = viewCatalog.getStoredView(Array(namespace), name)
+    assert(initial.queryColumnNames.toSeq == Seq("x", "y"))
+
+    sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION")
+    val afterEvo = viewCatalog.getStoredView(Array(namespace), name)
+    assert(afterEvo.schemaMode == "EVOLUTION")
+    assert(afterEvo.queryColumnNames.isEmpty,
+      "queryColumnNames must be cleared in EVOLUTION mode")
+
+    sql(s"ALTER VIEW $view WITH SCHEMA BINDING")
+    val afterBinding = viewCatalog.getStoredView(Array(namespace), name)
+    assert(afterBinding.schemaMode == "BINDING")
+    // ALTER VIEW WITH SCHEMA BINDING does not re-analyze the view body; the queryColumnNames
+    // field stays at whatever ALTER VIEW WITH SCHEMA EVOLUTION left it as. Users who want the
+    // original aliases back run CREATE OR REPLACE VIEW, which re-captures them.
+    assert(afterBinding.queryColumnNames.isEmpty)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewSetTblPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewSetTblPropertiesSuite.scala
new file mode 100644
index 0000000000000..642d8d46b7fac
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewSetTblPropertiesSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `ALTER VIEW ... SET TBLPROPERTIES` command on V2 view
+ * catalogs (`AlterV2ViewSetPropertiesExec`).
+ */
+class AlterViewSetTblPropertiesSuite
+  extends command.AlterViewSetTblPropertiesSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: catalog stores the property on ViewInfo") {
+    val view = s"$catalog.$namespace.v2_set_view_info"
+    createView(view)
+    sql(s"ALTER VIEW $view SET TBLPROPERTIES ('k' = 'v')")
+    val stored = viewCatalog.getStoredView(Array(namespace), "v2_set_view_info")
+    assert(stored.properties.get("k") == "v")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewUnsetTblPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewUnsetTblPropertiesSuite.scala
new file mode 100644
index 0000000000000..0d7f13007e9f5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterViewUnsetTblPropertiesSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+class AlterViewUnsetTblPropertiesSuite
+  extends command.AlterViewUnsetTblPropertiesSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: unset removes the entry from the stored ViewInfo") {
+    val view = s"$catalog.$namespace.v2_unset_view_info"
+    createViewWithProps(view, "k" -> "v")
+    sql(s"ALTER VIEW $view UNSET TBLPROPERTIES ('k')")
+    val stored = viewCatalog.getStoredView(Array(namespace), "v2_unset_view_info")
+    assert(!stored.properties.containsKey("k"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CreateNamespaceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CreateNamespaceSuite.scala
index 6b5475a1e2674..973676fe1f63b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CreateNamespaceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CreateNamespaceSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.command.v2
 
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.connector.catalog.ValidatingInMemoryTableCatalog
 import org.apache.spark.sql.execution.command
 
 /**
@@ -24,4 +26,23 @@ import org.apache.spark.sql.execution.command
  */
 class CreateNamespaceSuite extends command.CreateNamespaceSuiteBase with CommandSuiteBase {
   override def namespace: String = "ns1.ns2"
+
+  // A test catalog whose createNamespace validates before checking existence; used to
+  // exercise CreateNamespaceExec's IF NOT EXISTS recovery path.
+  private val validatingCatalog: String = "validating_test_catalog"
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$validatingCatalog",
+      classOf[ValidatingInMemoryTableCatalog].getName)
+
+  test("SPARK-55250: IF NOT EXISTS is a no-op on pre-existing namespace even when the " +
+    "catalog raises a non-NamespaceAlreadyExistsException error") {
+    val ns = s"$validatingCatalog.$namespace"
+    withNamespace(ns) {
+      sql(s"CREATE NAMESPACE $ns")
+      // Without the IF NOT EXISTS recovery path, this would surface the catalog's
+      // pre-existence validation error.
+      sql(s"CREATE NAMESPACE IF NOT EXISTS $ns")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CreateViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CreateViewSuite.scala
new file mode 100644
index 0000000000000..1750a538b1d4c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CreateViewSuite.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.{BasicInMemoryTableCatalog, TableCatalog, ViewInfo}
+import org.apache.spark.sql.execution.command
+
+class CreateViewSuite extends command.CreateViewSuiteBase with ViewCommandSuiteBase {
+  import testImplicits._
+
+  test("V2: CREATE VIEW propagates DEFAULT COLLATION onto the stored ViewInfo") {
+    val view = s"$catalog.$namespace.v2_create_collation"
+    withTable("spark_catalog.default.src_coll") {
+      Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.src_coll")
+      sql(s"CREATE VIEW $view DEFAULT COLLATION UTF8_BINARY AS " +
+        s"SELECT col FROM spark_catalog.default.src_coll")
+      val stored = viewCatalog.getStoredView(Array(namespace), "v2_create_collation")
+      assert(stored.properties().get(TableCatalog.PROP_COLLATION) == "UTF8_BINARY")
+    }
+  }
+
+  test("V2: CREATE VIEW stamps PROP_OWNER on the stored TableInfo") {
+    val view = s"$catalog.$namespace.v2_create_owner"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    val stored = viewCatalog.getStoredView(Array(namespace), "v2_create_owner")
+    assert(stored.properties().containsKey(TableCatalog.PROP_OWNER))
+  }
+
+  test("V2: CREATE VIEW on a non-ViewCatalog catalog fails with MISSING_CATALOG_ABILITY.VIEWS") {
+    withSQLConf(
+      "spark.sql.catalog.no_view_cat" -> classOf[BasicInMemoryTableCatalog].getName) {
+      val ex = intercept[AnalysisException] {
+        sql("CREATE VIEW no_view_cat.default.v AS SELECT 1")
+      }
+      assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS")
+    }
+  }
+
+  test("V2: CREATE VIEW IF NOT EXISTS over a table leaves the underlying TableInfo untouched") {
+    // The Base version of this scenario asserts the SQL behavior (errors / no-op);
+    // here we additionally pin the v2-only post-condition that the persisted entry under
+    // the colliding identifier remains a `TableInfo` and is NOT silently swapped for a
+    // `ViewInfo` by the IF NOT EXISTS path.
+    val name = "v2_ifne_keeps_table"
+    val view = s"$catalog.$namespace.$name"
+    withSeededTable(view) {
+      sql(s"CREATE VIEW IF NOT EXISTS $view AS SELECT 1 AS col")
+      val stored = viewCatalog.getStoredInfo(Array(namespace), name)
+      assert(!stored.isInstanceOf[ViewInfo])
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
index 1bd6df5cf928a..2e8fe7a4c0237 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
@@ -206,7 +206,10 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase
           Row("_partition", "string", "Partition key used to store the row"),
           Row("", "", ""),
           Row("# Detailed Table Information", "", ""),
-          Row("Name", tbl, ""),
+          Row("Catalog", catalog, ""),
+          Row("Namespace", "ns", ""),
+          Row("Database", "ns", ""),
+          Row("Table", "table", ""),
           Row("Type", "MANAGED", ""),
           Row("Comment", "this is a test table", ""),
           Row("Location", "file:/tmp/testcat/table_name", ""),
@@ -217,6 +220,45 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase
     }
   }
 
+  test("DESCRIBE TABLE EXTENDED emits structured Catalog/Namespace/Table rows") {
+    // Pin that DescribeTableExec emits the resolved-identifier components as separate
+    // rows under the `# Detailed Table Information` block, so consumers can read each
+    // part programmatically rather than splitting a concatenated identifier string. Also
+    // pin that for a single-segment namespace, an additional `Database` row is emitted
+    // alongside `Namespace` for v1 compatibility (mirroring v1 `CatalogTable` output).
+    withNamespaceAndTable("ns", "table") { tbl =>
+      sql(s"CREATE TABLE $tbl (id bigint) $defaultUsing")
+      val rows = sql(s"DESCRIBE TABLE EXTENDED $tbl").collect()
+      def findRowValue(name: String): String = rows
+        .find(_.getString(0) == name)
+        .getOrElse(fail(s"DESCRIBE output missing the `$name` row"))
+        .getString(1)
+      assert(findRowValue("Catalog") == catalog)
+      assert(findRowValue("Namespace") == "ns")
+      assert(findRowValue("Database") == "ns",
+        "single-segment namespace must also surface as a `Database` row for v1 parity")
+      assert(findRowValue("Table") == "table")
+    }
+  }
+
+  test("DESCRIBE TABLE EXTENDED with a multi-segment namespace surfaces the leaf " +
+      "segment in `Database` and joins `Namespace` with dots") {
+    // Multi-segment v2 namespaces still emit a `Database` row for v1 compatibility,
+    // carrying the trailing namespace segment. `Namespace` carries the full dot-joined
+    // form for consumers that need the complete path (with `quoteIfNeeded` applied per
+    // segment -- not exercised here because both segments are valid bare identifiers).
+    withNamespaceAndTable("ns1.ns2", "table") { tbl =>
+      sql(s"CREATE TABLE $tbl (id bigint) $defaultUsing")
+      val rows = sql(s"DESCRIBE TABLE EXTENDED $tbl").collect()
+      val byName = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+      assert(byName.get("Catalog").contains(catalog))
+      assert(byName.get("Namespace").contains("ns1.ns2"))
+      assert(byName.get("Database").contains("ns2"),
+        "multi-segment namespace must surface the trailing segment as `Database`")
+      assert(byName.get("Table").contains("table"))
+    }
+  }
+
   test("describe a non-existent column") {
     withNamespaceAndTable("ns", "tbl") { tbl =>
       sql(s"""
@@ -360,21 +402,20 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase
              |$defaultUsing
         """.stripMargin)
 
-        // Skipped showing NORELY since it is the default value.
+        // ENFORCED/NOT ENFORCED and RELY/NORELY are always emitted to match SHOW CREATE TABLE.
         var expectedConstraintsDdl = Array(
           "# Constraints,,",
-          "pk_table_pk,PRIMARY KEY (id) NOT ENFORCED,",
+          "pk_table_pk,PRIMARY KEY (id) NOT ENFORCED NORELY,",
           s"fk_a,FOREIGN KEY (a) REFERENCES $fkTable (id) NOT ENFORCED RELY,",
-          "uk_b,UNIQUE (b) NOT ENFORCED,",
-          "uk_a_c,UNIQUE (a, c) NOT ENFORCED,",
-          "c1,CHECK (c IS NOT NULL) ENFORCED,",
-          "c2,CHECK (id > 0) ENFORCED,"
+          "uk_b,UNIQUE (b) NOT ENFORCED NORELY,",
+          "uk_a_c,UNIQUE (a, c) NOT ENFORCED NORELY,",
+          "c1,CHECK (c IS NOT NULL) ENFORCED NORELY,",
+          "c2,CHECK (id > 0) ENFORCED NORELY,"
         )
         var descDdL = sql(s"DESCRIBE EXTENDED $tbl").collect().map(_.mkString(","))
           .dropWhile(_ != "# Constraints,,")
         assert(descDdL === expectedConstraintsDdl)
 
-        // Show non-default value for RELY.
         sql(s"ALTER TABLE $tbl ADD CONSTRAINT c3 CHECK (b IS NOT NULL) RELY")
         descDdL = sql(s"DESCRIBE EXTENDED $tbl").collect().map(_.mkString(","))
           .dropWhile(_ != "# Constraints,,")
@@ -386,7 +427,7 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase
         descDdL = sql(s"DESCRIBE EXTENDED $tbl").collect().map(_.mkString(","))
           .dropWhile(_ != "# Constraints,,")
         assert(descDdL === expectedConstraintsDdl
-          .filter(_ != "c1,CHECK (c IS NOT NULL) ENFORCED,"))
+          .filter(_ != "c1,CHECK (c IS NOT NULL) ENFORCED NORELY,"))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeViewColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeViewColumnSuite.scala
new file mode 100644
index 0000000000000..ab0654d672f09
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeViewColumnSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+class DescribeViewColumnSuite
+  extends command.DescribeViewColumnSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeViewSuite.scala
new file mode 100644
index 0000000000000..346c2cb22f5b6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeViewSuite.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+class DescribeViewSuite
+  extends command.DescribeViewSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: extended emits the v2-native `# Detailed View Information` header") {
+    // v1 emits `# Detailed Table Information` for views (CatalogTableType.VIEW shares the
+    // same describe path as CatalogTableType.{MANAGED,EXTERNAL}); v2's `DescribeV2ViewExec`
+    // routes views to a dedicated header. Pin the v2-side text here so the divergence stays
+    // intentional.
+    val view = s"$catalog.$namespace.v2_desc_ext_header"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    val rows = sql(s"DESCRIBE TABLE EXTENDED $view").collect().map(_.getString(0))
+    assert(rows.contains("# Detailed View Information"),
+      s"v2 extended describe should emit the View header; got:\n${rows.mkString("\n")}")
+  }
+
+  test("V2: extended emits structured Catalog/Namespace/View rows") {
+    // Pin the v2 view layout: structured `Catalog`/`Namespace`/`View` rows under
+    // `# Detailed View Information`, plus the v1-compat `Database` row when the namespace
+    // is a single segment. Mirrors the table-side pin in `DescribeTableSuite`.
+    val view = s"$catalog.$namespace.v2_desc_struct_rows"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    val rows = sql(s"DESCRIBE TABLE EXTENDED $view").collect()
+    val byName = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+    assert(byName.get("Catalog").contains(catalog))
+    assert(byName.get("Namespace").contains(namespace))
+    assert(byName.get("Database").contains(namespace),
+      "single-segment namespace must also surface as a `Database` row for v1 parity")
+    assert(byName.get("View").contains("v2_desc_struct_rows"))
+  }
+
+  test("V2: extended on a multi-segment namespace surfaces the leaf segment in " +
+      "`Database` and joins `Namespace` with dots") {
+    // Multi-segment v2 namespaces still emit a `Database` row for v1 compatibility,
+    // carrying the trailing namespace segment. `Namespace` carries the full dot-joined
+    // form for consumers that need the complete path.
+    val ns = s"$catalog.ns1.ns2"
+    withNamespace(ns) {
+      sql(s"CREATE NAMESPACE IF NOT EXISTS $ns")
+      val view = s"$ns.v2_desc_multi_ns"
+      sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+      val rows = sql(s"DESCRIBE TABLE EXTENDED $view").collect()
+      val byName = rows.map(r => r.getString(0) -> r.getString(1)).toMap
+      assert(byName.get("Catalog").contains(catalog))
+      assert(byName.get("Namespace").contains("ns1.ns2"))
+      assert(byName.get("Database").contains("ns2"),
+        "multi-segment namespace must surface the trailing segment as `Database`")
+      assert(byName.get("View").contains("v2_desc_multi_ns"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
index ffc2c6c679a8b..0e5cbb861d05d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
@@ -41,6 +41,13 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase {
     }
   }
 
+  test("DROP TABLE IF EXISTS ... PURGE on a missing table is a no-op") {
+    // The default TableCatalog.purgeTable throws unconditionally, so without an upfront
+    // existence guard `IF EXISTS` would surface UNSUPPORTED_FEATURE.PURGE_TABLE for missing
+    // tables -- defeating the IF EXISTS contract on catalogs that do not support purge.
+    sql(s"DROP TABLE IF EXISTS $catalog.ns.never_existed PURGE")
+  }
+
   test("table qualified with the session catalog name") {
     withSQLConf(
       V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryTableSessionCatalog].getName) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropViewSuite.scala
new file mode 100644
index 0000000000000..cf8ec930fddb1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropViewSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.{BasicInMemoryTableCatalog, Identifier}
+import org.apache.spark.sql.execution.command
+
+class DropViewSuite extends command.DropViewSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: drop removes the entry from the catalog store") {
+    val view = s"$catalog.$namespace.v2_drop_remove"
+    sql(s"CREATE VIEW $view AS SELECT 1 AS x")
+    assert(viewCatalog.viewExists(Identifier.of(Array(namespace), "v2_drop_remove")))
+    sql(s"DROP VIEW $view")
+    assert(!viewCatalog.viewExists(Identifier.of(Array(namespace), "v2_drop_remove")))
+  }
+
+  test("V2: DROP VIEW on a non-view table entry leaves the table untouched") {
+    // The Base version of this scenario asserts the SQL behavior (rejection with
+    // EXPECT_VIEW_NOT_TABLE); here we additionally pin the v2-only post-condition that
+    // the underlying entry under the colliding identifier remains a table and was not
+    // silently dropped by the rejected DROP VIEW.
+    val name = "v2_drop_keeps_table"
+    val view = s"$catalog.$namespace.$name"
+    val ident = Identifier.of(Array(namespace), name)
+    withSeededTable(view) {
+      intercept[AnalysisException](sql(s"DROP VIEW $view"))
+      assert(viewCatalog.tableExists(ident))
+    }
+  }
+
+  test("V2: DROP VIEW on a non-ViewCatalog catalog fails") {
+    withSQLConf(
+      "spark.sql.catalog.no_view_drop_cat" -> classOf[BasicInMemoryTableCatalog].getName) {
+      val ex = intercept[AnalysisException] {
+        sql("DROP VIEW no_view_drop_cat.default.v")
+      }
+      // Resolution fails because the catalog cannot host views; the exact error condition
+      // depends on the resolver's not-found vs. capability-gate ordering.
+      assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).matches(
+        ".*(no such|not found|missing|not a view|cannot find|view).*"),
+        s"unexpected error: ${ex.getMessage}")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index 1ffe781712825..e2ce378f21cc5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -138,9 +138,26 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
         "b STRING COLLATE UTF8_BINARY,",
         "ts TIMESTAMP)",
         defaultUsing,
-        "PARTITIONED BY (a, years(ts), months(ts), days(ts), hours(ts))",
-        "CLUSTERED BY (b)",
-        "INTO 16 BUCKETS"
+        "PARTITIONED BY (a, bucket(16, b), years(ts), months(ts), days(ts), hours(ts))"
+      ))
+    }
+  }
+
+  test("SPARK-56755: show create table[partitioned by multi bucket transforms]") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (a INT, b STRING, ts TIMESTAMP) $defaultUsing
+           |PARTITIONED BY (bucket(4, a), bucket(8, b), years(ts))
+         """.stripMargin)
+      val showDDL = getShowCreateDDL(t, false)
+      assert(showDDL === Array(
+        s"CREATE TABLE $t (",
+        "a INT,",
+        "b STRING COLLATE UTF8_BINARY,",
+        "ts TIMESTAMP)",
+        defaultUsing,
+        "PARTITIONED BY (bucket(4, a), bucket(8, b), years(ts))"
       ))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateViewSuite.scala
new file mode 100644
index 0000000000000..39bde3c7d58cc
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateViewSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+class ShowCreateViewSuite
+  extends command.ShowCreateViewSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewColumnsSuite.scala
new file mode 100644
index 0000000000000..92c90573cecd7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewColumnsSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+class ShowViewColumnsSuite
+  extends command.ShowViewColumnsSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewPropertiesSuite.scala
new file mode 100644
index 0000000000000..44420167d3a7f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewPropertiesSuite.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+class ShowViewPropertiesSuite
+  extends command.ShowViewPropertiesSuiteBase with ViewCommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewsSuite.scala
new file mode 100644
index 0000000000000..a3e5533eeb3be
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowViewsSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.BasicInMemoryTableCatalog
+import org.apache.spark.sql.execution.command
+
+class ShowViewsSuite extends command.ShowViewsSuiteBase with ViewCommandSuiteBase {
+
+  test("V2: SHOW VIEWS on a non-ViewCatalog catalog fails") {
+    withSQLConf(
+      "spark.sql.catalog.no_view_show_cat" -> classOf[BasicInMemoryTableCatalog].getName) {
+      val ex = intercept[AnalysisException] {
+        sql("SHOW VIEWS IN no_view_show_cat.default")
+      }
+      assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ViewCommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ViewCommandSuiteBase.scala
new file mode 100644
index 0000000000000..4dbbbc21afa9f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ViewCommandSuiteBase.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.connector.catalog.InMemoryTableViewCatalog
+
+/**
+ * Settings for v2 view command test suites. Extends v2 [[CommandSuiteBase]] (so view tests
+ * inherit `checkLocation` and the standard v2 `test_catalog` configuration), and additionally
+ * wires `test_view_catalog` to [[InMemoryTableViewCatalog]] -- the catalog that the unified
+ * `*SuiteBase` view tests under `command/` target via the `$catalog` placeholder.
+ */
+trait ViewCommandSuiteBase extends CommandSuiteBase {
+  override def catalog: String = "test_view_catalog"
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableViewCatalog].getName)
+
+  /** Helper: returns the configured `InMemoryTableViewCatalog`. */
+  protected def viewCatalog: InMemoryTableViewCatalog =
+    spark.sessionState.catalogManager.catalog(catalog).asInstanceOf[InMemoryTableViewCatalog]
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 1150f6163b978..f4de8a52810e4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -31,7 +31,8 @@ import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
-import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -657,6 +658,28 @@ class FileIndexSuite extends SharedSparkSession {
     assert(FileIndexOptions.isValidOption("pathglobfilter"))
   }
 
+  test("recursiveFileLookup with a user-specified partition spec is rejected") {
+    withTempDir { dir =>
+      val partitionSchema = StructType(Seq(StructField("year", IntegerType, nullable = true)))
+      val partitionSpec = PartitionSpec(
+        partitionSchema,
+        Seq(PartitionPath(InternalRow(2024), new Path(dir.getCanonicalPath))))
+      val fileIndex = new InMemoryFileIndex(
+        spark,
+        rootPathsSpecified = Seq(new Path(dir.getCanonicalPath)),
+        parameters = Map("recursiveFileLookup" -> "true"),
+        userSpecifiedSchema = None,
+        userSpecifiedPartitionSpec = Some(partitionSpec))
+      checkError(
+        exception = intercept[AnalysisException] {
+          fileIndex.listFiles(Nil, Nil)
+        },
+        condition = "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE",
+        parameters = Map.empty[String, String]
+      )
+    }
+  }
+
   test("SPARK-52339: Correctly compare root paths") {
     withTempDir { dir =>
       val file1 = new File(dir, "text1.txt")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala
index 8f0a000f4517d..1ace87deab471 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala
@@ -24,11 +24,13 @@ import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, FileSourceConstantMetadataStructField, FileSourceGeneratedMetadataStructField, Literal}
+import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.classic.{DataFrame, Dataset}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.functions.{col, lit, when}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, LongType, StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
 /** Verifies the ability for a FileFormat to define custom metadata types */
@@ -336,6 +338,59 @@ class FileSourceCustomMetadataStructSuite extends SharedSparkSession {
     }
   }
 
+  test("[SPARK-56931] complex constant metadata fields (array<struct>, struct) on row path") {
+    withTempData("parquet", FILE_SCHEMA) { (_, f0, f1) =>
+      val permElement = StructType(Seq(
+        StructField("email", StringType),
+        StructField("role", StringType)))
+      val locationStruct = StructType(Seq(
+        StructField("country", StringType),
+        StructField("city", StringType)))
+      val complexFields = Seq(
+        FileSourceConstantMetadataStructField("perms", ArrayType(permElement, containsNull = true)),
+        FileSourceConstantMetadataStructField("location", locationStruct))
+      val format = new TestFileFormat(complexFields)
+
+      // Build per-file values in catalyst form.
+      def perms(email: String, role: String): InternalRow =
+        InternalRow(UTF8String.fromString(email), UTF8String.fromString(role))
+      def loc(country: String, city: String): InternalRow =
+        InternalRow(UTF8String.fromString(country), UTF8String.fromString(city))
+
+      val files = Seq(
+        FileStatusWithMetadata(f0, Map(
+          "perms" -> new GenericArrayData(Array[Any](perms("a@x", "r"), perms("b@x", "w"))),
+          "location" -> loc("US", "SFO"))),
+        FileStatusWithMetadata(f1, Map(
+          "perms" -> new GenericArrayData(Array[Any](perms("c@x", "r"), perms("d@x", "o"))),
+          "location" -> loc("CA", "YYZ"))))
+      val df = createDF(format, files)
+
+      // Force the row materialization path (Batched=false) so we exercise the
+      // updateMetadataInternalRow -> getFileConstantMetadataColumnValue -> Literal.create
+      // change end-to-end. The query touches a subset of each subfield to also exercise
+      // the metadata-schema pruning preservation rule.
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+        // Query only the non-first sub-fields of each complex column. A buggy implementation
+        // that pruned the kept sub-attribute's inner schema down to only the queried fields
+        // would surface here: the extractor still produces `InternalRow("US", "SFO")` /
+        // `InternalRow(email, role)`, and reading the kept field at the pruned (now zero)
+        // ordinal would yield the index-0 value instead of the index-1 value.
+        checkAnswer(
+          df.selectExpr(
+            "fileNum",
+            "_metadata.perms[1].role AS second_role",
+            "_metadata.location.city AS city",
+            "size(_metadata.perms) AS perms_count"),
+          Seq(
+            Row(0, "w", "SFO", 2),
+            Row(0, "w", "SFO", 2),
+            Row(1, "o", "YYZ", 2),
+            Row(1, "o", "YYZ", 2)))
+      }
+    }
+  }
+
   test("generated columns and extractors take precedence over metadata map values") {
     withTempData("parquet", FILE_SCHEMA) { (_, f0, f1) =>
       import FileFormat.{FILE_NAME, FILE_SIZE}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcTaskInterruptSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcTaskInterruptSuite.scala
index 29da58136fcb4..3005903482284 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcTaskInterruptSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcTaskInterruptSuite.scala
@@ -289,7 +289,7 @@ class JdbcTaskInterruptSuite extends SparkFunSuite with SharedSparkSession {
         options = options,
         databaseMetadata = JDBCDatabaseMetadata.fromJDBCConnectionFactory(getConnection),
         groupByColumns = None,
-        sample = None,
+        sampleClause = None,
         limit = 0,
         sortOrders = Array.empty,
         offset = 0,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala
index 9b34e5011b5dd..d62ef9c3f197d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala
@@ -125,13 +125,13 @@ class ParquetDeltaByteArrayEncodingSuite extends ParquetCompatibilityTest with S
 
     for (i <- 0 until length) {
       val actualWkb = if (isGeometry) {
-        val geom = writableColumnVector.getGeometry(i)
-        assert(srid === STUtils.stSrid(geom))
-        STUtils.stAsBinary(geom)
+        val geom = writableColumnVector.getBinaryView(i)
+        assert(srid === STUtils.stGeomSrid(geom))
+        STUtils.stGeomAsBinary(geom)
       } else {
-        val geog = writableColumnVector.getGeography(i)
-        assert(srid === STUtils.stSrid(geog))
-        STUtils.stAsBinary(geog)
+        val geog = writableColumnVector.getBinaryView(i)
+        assert(srid === STUtils.stGeogSrid(geog))
+        STUtils.stGeogAsBinary(geog)
       }
       assert(wkbValues(i) sameElements actualWkb)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala
index b9009f59f69a2..fd7dcabb47984 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala
@@ -152,13 +152,13 @@ class ParquetDeltaLengthByteArrayEncodingSuite
 
     for (i <- 0 until length) {
       val actualWkb = if (isGeometry) {
-        val geom = writableColumnVector.getGeometry(i)
-        assert(srid === STUtils.stSrid(geom))
-        STUtils.stAsBinary(geom)
+        val geom = writableColumnVector.getBinaryView(i)
+        assert(srid === STUtils.stGeomSrid(geom))
+        STUtils.stGeomAsBinary(geom)
       } else {
-        val geog = writableColumnVector.getGeography(i)
-        assert(srid === STUtils.stSrid(geog))
-        STUtils.stAsBinary(geog)
+        val geog = writableColumnVector.getBinaryView(i)
+        assert(srid === STUtils.stGeogSrid(geog))
+        STUtils.stGeogAsBinary(geog)
       }
       assert(wkbValues(i) sameElements actualWkb)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
index 28b76e21ec071..b0ad5ca8cab71 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
@@ -43,7 +43,7 @@ abstract class ParquetFileFormatSuite
 
   private def checkCannotReadFooterError(body: => Unit): Unit = {
     checkErrorMatchPVals(
-      exception = intercept[SparkException] { body }.getCause.asInstanceOf[SparkException],
+      exception = intercept[SparkException] { body },
       condition = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
       parameters = Map("path" -> "file:.*")
     )
@@ -96,10 +96,12 @@ abstract class ParquetFileFormatSuite
     }
 
     testReadFooters(true)
+    // With preserveSparkThrowable=true, the structured error class is thrown directly
+    // without being wrapped in a generic SparkException by awaitResult.
     checkErrorMatchPVals(
       exception = intercept[SparkException] {
         testReadFooters(false)
-      }.getCause.asInstanceOf[SparkException],
+      },
       condition = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
       parameters = Map("path" -> "file:.*")
     )
@@ -142,7 +144,7 @@ abstract class ParquetFileFormatSuite
       exception = intercept[SparkException] {
         ParquetFileFormat.readParquetFootersInParallel(
           conf, Seq(fakeStatus), ignoreCorruptFiles = false, ignoreMissingFiles = false)
-      }.getCause.asInstanceOf[SparkException],
+      },
       condition = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
       parameters = Map("path" -> s"${WrappingFNFLocalFileSystem.scheme}:.*")
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetGeoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetGeoSuite.scala
index 107b5b7675b1b..057f722f215c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetGeoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetGeoSuite.scala
@@ -22,8 +22,9 @@ import java.io.File
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
 import org.apache.spark.sql.{DataFrame, Row}
-import org.apache.spark.sql.functions.{st_asbinary, st_geogfromwkb, st_geomfromwkb}
+import org.apache.spark.sql.functions.{st_asbinary, st_geogfromwkb, st_geomfromwkb, st_srid}
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{Geography, GeographyType, StructField, StructType}
 
 class ParquetGeoSuite
     extends ParquetCompatibilityTest
@@ -98,6 +99,32 @@ class ParquetGeoSuite
     testReadWrite(Seq(point1Wkb, line1Wkb, makePolygonWkb()))
   }
 
+  test("geography preserves non-default SRID through Parquet round-trip") {
+    // Geography supports a variety of geographic SRIDs beyond the default 4326. Verify that the
+    // SRID is preserved when written to and read back from Parquet, across the row-based reader
+    // (ParquetGeographyConverter) and the vectorized reader (WKBToGeographyConverter).
+    Seq(4267, 4269, 4326).foreach { srid =>
+      withTempDir { dir =>
+        val schema = StructType(Seq(
+          StructField("geog", GeographyType(srid), nullable = true)))
+        val wkbValues = Seq(point0Wkb, point1Wkb, line0Wkb)
+        val rdd = sparkContext.parallelize(
+          wkbValues.map(wkb => Row(Geography.fromWKB(wkb, srid))))
+        val df = spark.createDataFrame(rdd, schema)
+        withAllParquetWriters {
+          df.write.mode("overwrite").parquet(dir.getAbsolutePath)
+          withAllParquetReaders {
+            // Verify both the WKB payload and the SRID round-trip correctly.
+            checkAnswer(
+              spark.read.parquet(dir.getAbsolutePath)
+                .select(st_asbinary($"geog"), st_srid($"geog")),
+              wkbValues.map(wkb => Row(wkb, srid)))
+          }
+        }
+      }
+    }
+  }
+
   test("dictionary encoding") {
     val wkbValues = Seq(
       point0Wkb,
@@ -119,4 +146,36 @@ class ParquetGeoSuite
       }
     }
   }
+
+  test("geography preserves non-default SRID with dictionary encoding") {
+    // Force dictionary encoding by repeating a small number of values many times. This exercises
+    // the setDictionary path of ParquetGeographyConverter and the dictionary path of
+    // WKBToGeographyConverter, both of which must materialize geographies with the column's SRID.
+    val srid = 4267
+    val wkbValues = Seq(point0Wkb, point1Wkb, line0Wkb)
+    val repeatedWkbs = List.fill(10000)(wkbValues).flatten
+    val schema = StructType(Seq(
+      StructField("geog", GeographyType(srid), nullable = true)))
+    val rdd = sparkContext.parallelize(
+      repeatedWkbs.map(wkb => Row(Geography.fromWKB(wkb, srid))))
+    val df = spark.createDataFrame(rdd, schema)
+
+    Seq(true, false).foreach { useDictionary =>
+      withSQLConf(ParquetOutputFormat.ENABLE_DICTIONARY -> useDictionary.toString) {
+        withTempDir { dir =>
+          withAllParquetWriters {
+            df.write.mode("overwrite").parquet(dir.getAbsolutePath)
+            withAllParquetReaders {
+              // Aggregate-style assertion to keep the comparison cheap on 30K rows: every row
+              // should round-trip with the original SRID.
+              val readBack = spark.read.parquet(dir.getAbsolutePath)
+                .select(st_srid($"geog").as("srid"))
+              assert(readBack.count() === repeatedWkbs.length)
+              assert(readBack.where($"srid" =!= srid).count() === 0)
+            }
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 44e6ae4da6a5b..e91562d097ff1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -1887,6 +1887,46 @@ class ParquetIOSuite extends ParquetTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-56872: INT64 DECIMAL into 32-bit Decimal column with dictionary fallback") {
+    // `DowncastLongUpdater.decodeSingleDictionaryId` only runs when the vectorized reader has
+    // to eagerly drain buffered dictionary IDs, which happens when parquet-mr writes the
+    // column as a mix of dictionary-encoded and PLAIN pages. The mix-cardinality values below
+    // (4-value pool + unique-per-row) force that fallback; uniformly low- or high-cardinality
+    // data bypasses the path. INT64 DECIMAL(p<=9) is built via parquet-mr's low-level writer
+    // because Spark's own writer emits INT32 for that case.
+    val schema = MessageTypeParser.parseMessageType(
+      """message root {
+        |  required int64 v (DECIMAL(9, 2));
+        |}""".stripMargin)
+    def unscaledAt(i: Int): Long = i % 5 match {
+      case 0 => -999_999_999L
+      case 1 => -1L
+      case 2 => 0L
+      case 3 => 999_999_999L
+      case _ => i.toLong * 13L - 7L
+    }
+    withTempDir { dir =>
+      val tablePath = new Path(s"${dir.getCanonicalPath}/dec.parquet")
+      val writer = createParquetWriter(schema, tablePath, dictionaryEnabled = true)
+      val numRecords = 5000
+      (0 until numRecords).foreach { i =>
+        val record = new SimpleGroup(schema)
+        record.add(0, unscaledAt(i))
+        writer.write(record)
+      }
+      writer.close()
+
+      withAllParquetReaders {
+        val readSchema = new StructType().add("v", DecimalType(9, 2), nullable = false)
+        val df = spark.read.schema(readSchema).parquet(tablePath.toString)
+        val expected = (0 until numRecords).map { i =>
+          Row(java.math.BigDecimal.valueOf(unscaledAt(i), 2))
+        }
+        checkAnswer(df, expected)
+      }
+    }
+  }
 }
 
 class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: TaskAttemptContext)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadStateTestAccess.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTestAccess.scala
similarity index 57%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadStateTestAccess.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTestAccess.scala
index d718ed49d7103..e008a7b0322e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadStateTestAccess.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTestAccess.scala
@@ -18,24 +18,37 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.lang.reflect.{InvocationTargetException, Method}
+import java.time.ZoneId
 import java.util.PrimitiveIterator
 
 import org.apache.parquet.column.ColumnDescriptor
+import org.apache.parquet.schema.LogicalTypeAnnotation
 
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector
 import org.apache.spark.util.SparkClassUtils
 
 /**
- * Reflective bridge to the package-private `ParquetReadState`. Under `spark-submit --jars`,
- * test and main classes load from different classloaders, blocking package-private access.
- * Reflection with `setAccessible` sidesteps the check without widening production visibility.
+ * Reflective bridge to package-private classes in
+ * `org.apache.spark.sql.execution.datasources.parquet`. Under `spark-submit --jars`, test
+ * and main classes load from different classloaders, blocking package-private access despite
+ * the matching package name. Reflection with `setAccessible(true)` sidesteps the check
+ * without widening production visibility.
+ *
+ * Currently bridges:
+ *   - `ParquetReadState` (constructor + `resetForNewBatch` + `resetForNewPage`)
+ *   - `VectorizedRleValuesReader.readBatch` (5-arg overload not exposed publicly)
+ *   - `ParquetVectorUpdaterFactory` (constructor)
+ *   - `VectorizedDeltaByteArrayReader` (no-arg constructor)
+ *   - `VectorizedDeltaLengthByteArrayReader` (no-arg constructor)
  */
-object ParquetReadStateTestAccess {
+object ParquetTestAccess {
+
+  // -------- ParquetReadState --------
 
   private val stateCls = SparkClassUtils.classForName[Any](
     "org.apache.spark.sql.execution.datasources.parquet.ParquetReadState")
 
-  private val ctor = {
+  private val stateCtor = {
     val c = stateCls.getDeclaredConstructor(
       classOf[ColumnDescriptor],
       java.lang.Boolean.TYPE,
@@ -71,7 +84,7 @@ object ParquetReadStateTestAccess {
       isRequired: Boolean,
       rowIndexes: PrimitiveIterator.OfLong = null): AnyRef = {
     try {
-      ctor.newInstance(
+      stateCtor.newInstance(
         descriptor,
         Boolean.box(isRequired),
         rowIndexes).asInstanceOf[AnyRef]
@@ -105,6 +118,63 @@ object ParquetReadStateTestAccess {
         reader, state, values, defLevels, valueReader, updater)
     } catch { case e: ReflectiveOperationException => throw rethrow(e) }
 
+  // -------- ParquetVectorUpdaterFactory --------
+
+  private val factoryCtor = {
+    val cls = SparkClassUtils.classForName[Any](
+      "org.apache.spark.sql.execution.datasources.parquet.ParquetVectorUpdaterFactory")
+    val c = cls.getDeclaredConstructor(
+      classOf[LogicalTypeAnnotation],
+      classOf[ZoneId],
+      classOf[String],
+      classOf[String],
+      classOf[String],
+      classOf[String])
+    c.setAccessible(true)
+    c
+  }
+
+  def newFactory(
+      logicalTypeAnnotation: LogicalTypeAnnotation,
+      convertTz: ZoneId,
+      datetimeRebaseMode: String,
+      datetimeRebaseTz: String,
+      int96RebaseMode: String,
+      int96RebaseTz: String): ParquetVectorUpdaterFactory = {
+    try {
+      factoryCtor.newInstance(
+        logicalTypeAnnotation, convertTz,
+        datetimeRebaseMode, datetimeRebaseTz,
+        int96RebaseMode, int96RebaseTz).asInstanceOf[ParquetVectorUpdaterFactory]
+    } catch {
+      case e: ReflectiveOperationException => throw rethrow(e)
+    }
+  }
+
+  // -------- VectorizedDeltaByteArrayReader / VectorizedDeltaLengthByteArrayReader --------
+
+  private val deltaByteArrayCtor = {
+    val c = classOf[VectorizedDeltaByteArrayReader].getDeclaredConstructor()
+    c.setAccessible(true)
+    c
+  }
+
+  private val deltaLengthByteArrayCtor = {
+    val c = classOf[VectorizedDeltaLengthByteArrayReader].getDeclaredConstructor()
+    c.setAccessible(true)
+    c
+  }
+
+  def newDeltaByteArrayReader(): VectorizedDeltaByteArrayReader =
+    try { deltaByteArrayCtor.newInstance() }
+    catch { case e: ReflectiveOperationException => throw rethrow(e) }
+
+  def newDeltaLengthByteArrayReader(): VectorizedDeltaLengthByteArrayReader =
+    try { deltaLengthByteArrayCtor.newInstance() }
+    catch { case e: ReflectiveOperationException => throw rethrow(e) }
+
+  // -------- shared helper --------
+
   private def rethrow(e: ReflectiveOperationException): RuntimeException = {
     val cause = e match {
       case ite: InvocationTargetException => ite.getCause
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala
new file mode 100644
index 0000000000000..a78593096d5c8
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala
@@ -0,0 +1,419 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.nio.{ByteBuffer, ByteOrder}
+import java.time.ZoneOffset
+
+import org.apache.parquet.bytes.ByteBufferInputStream
+import org.apache.parquet.column.ColumnDescriptor
+import org.apache.parquet.schema.{LogicalTypeAnnotation, Types}
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+import org.apache.parquet.schema.Type.Repetition
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector}
+import org.apache.spark.sql.types._
+
+/**
+ * Low-level benchmark for `ParquetVectorUpdater` implementations in
+ * `ParquetVectorUpdaterFactory`. Measures the per-batch throughput of `readValues`,
+ * `readValue`, and `skipValues` for the family of Updaters Spark uses to populate a
+ * column vector from a `VectorizedValuesReader`.
+ *
+ * Coverage is intentionally broad - every Updater family is included even when no
+ * obvious optimization opportunity exists today, so the result file tracks the
+ * long-term baseline and future iteration does not have to add coverage first.
+ *
+ * Groups:
+ *   A. Identity Updaters -- direct copy from PLAIN values into target column type
+ *      (Boolean, Byte, Short, Integer, Long, Float, Double, Binary).
+ *   B. Type-converting Updaters -- per-row read+convert+write loops.
+ *      `IntegerToLong`, `IntegerToDouble`, `FloatToDouble`, `DateToTimestampNTZ`,
+ *      `DowncastLong`.
+ *   C. Rebase Updaters -- date/timestamp legacy-calendar rebase variants.
+ *      `IntegerWithRebase` (DATE), `LongWithRebase` (TIMESTAMP_MICROS),
+ *      `LongAsMicros`.
+ *   D. Unsigned Updaters -- `UnsignedInteger`, `UnsignedLong`.
+ *   E. Decimal Updaters -- `IntegerToDecimal`, `LongToDecimal`,
+ *      `BinaryToDecimal`, `FixedLenByteArrayToDecimal`.
+ *   F. FixedLenByteArray Updaters -- `FixedLenByteArray`, `FixedLenByteArrayAsInt`,
+ *      `FixedLenByteArrayAsLong`.
+ *
+ * Updater instances are obtained via `ParquetVectorUpdaterFactory.getUpdater`, the
+ * production entry point, so the benchmark exercises the full configuration matrix
+ * (logical-type annotation, rebase mode, timezone) the production decoder uses.
+ *
+ * Pre-warm: each case runs one full `readValues` against a fresh reader before
+ * `benchmark.addCase` to stabilize first-case JIT state.
+ *
+ * To run this benchmark:
+ * {{{
+ *   1. build/sbt "sql/Test/runMain <this class>"
+ *   2. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results in "benchmarks/ParquetVectorUpdaterBenchmark-results.txt".
+ *   3. GHA: `Run benchmarks` workflow, class = `*ParquetVectorUpdater*`.
+ * }}}
+ */
+object ParquetVectorUpdaterBenchmark extends BenchmarkBase {
+
+  private val NUM_ROWS = 1024 * 1024
+  private val NUM_ITERS = 5
+
+  // --------------- Helpers ---------------
+
+  private def descriptor(
+      name: PrimitiveTypeName,
+      logical: LogicalTypeAnnotation = null,
+      typeLength: Int = 0): ColumnDescriptor = {
+    var builder = Types.primitive(name, Repetition.OPTIONAL)
+    if (typeLength > 0) builder = builder.length(typeLength)
+    if (logical != null) builder = builder.as(logical)
+    new ColumnDescriptor(Array("col"), builder.named("col"), 0, 1)
+  }
+
+  // Production code (`VectorizedColumnReader`) constructs the Factory with the descriptor's
+  // own logical type annotation. The Factory's `isTimestampTypeMatched` and friends read
+  // that field rather than the descriptor, so the two must agree or the Factory throws.
+  private def factory(
+      desc: ColumnDescriptor,
+      datetimeRebaseMode: String = "CORRECTED",
+      int96RebaseMode: String = "CORRECTED"): ParquetVectorUpdaterFactory =
+    ParquetTestAccess.newFactory(
+      desc.getPrimitiveType.getLogicalTypeAnnotation,
+      ZoneOffset.UTC, datetimeRebaseMode, "UTC", int96RebaseMode, "UTC")
+
+  // ---- PLAIN-encoded value byte producers ----
+
+  private def plainIntBytes(count: Int)(f: Int => Int): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 4).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putInt(f(i)); i += 1 }
+    buf.array()
+  }
+
+  private def plainLongBytes(count: Int)(f: Int => Long): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 8).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putLong(f(i)); i += 1 }
+    buf.array()
+  }
+
+  private def plainFloatBytes(count: Int)(f: Int => Float): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 4).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putFloat(f(i)); i += 1 }
+    buf.array()
+  }
+
+  private def plainDoubleBytes(count: Int)(f: Int => Double): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 8).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putDouble(f(i)); i += 1 }
+    buf.array()
+  }
+
+  private def plainBooleanBytes(count: Int): Array[Byte] = {
+    val byteCount = (count + 7) / 8
+    val out = new Array[Byte](byteCount)
+    var i = 0
+    while (i < count) {
+      if ((i & 1) == 0) out(i / 8) = (out(i / 8) | (1 << (i % 8))).toByte
+      i += 1
+    }
+    out
+  }
+
+  /** Variable-length binary: 4-byte length prefix + payload, repeated. */
+  private def plainBinaryBytes(count: Int, payloadLen: Int): Array[Byte] = {
+    val recordLen = 4 + payloadLen
+    val buf = ByteBuffer.allocate(count * recordLen).order(ByteOrder.LITTLE_ENDIAN)
+    val payload = new Array[Byte](payloadLen)
+    var i = 0
+    while (i < count) {
+      buf.putInt(payloadLen)
+      buf.put(payload)
+      i += 1
+    }
+    buf.array()
+  }
+
+  /** Fixed-length byte array: just `count * len` bytes, all zero. */
+  private def plainFixedLenBytes(count: Int, len: Int): Array[Byte] =
+    new Array[Byte](count * len)
+
+  private def newPlainReader(bytes: Array[Byte]): VectorizedPlainValuesReader = {
+    val r = new VectorizedPlainValuesReader
+    r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+    r
+  }
+
+  // --------------- Per-case runners ---------------
+
+  /**
+   * Pre-warms then registers a benchmark case that reads `NUM_ROWS` values via
+   * `updater.readValues`. The Factory is built from the descriptor inside the helper
+   * so its `logicalTypeAnnotation` field always matches the descriptor (production
+   * `VectorizedColumnReader` does the same). The Updater is obtained fresh from the
+   * Factory inside the case body to mirror production (one Updater per batch).
+   * `vector.reset()` clears arrayData for variable-length types so byte storage does
+   * not accumulate.
+   */
+  private def addReadValuesCase(
+      benchmark: Benchmark,
+      label: String,
+      sparkType: DataType,
+      desc: ColumnDescriptor,
+      vector: WritableColumnVector,
+      bytes: Array[Byte],
+      datetimeRebaseMode: String = "CORRECTED",
+      int96RebaseMode: String = "CORRECTED"): Unit = {
+    val fac = factory(desc, datetimeRebaseMode, int96RebaseMode)
+
+    // Pre-warm so the call site is C2-compiled before benchmark.run() measures.
+    vector.reset()
+    val warmUpdater = fac.getUpdater(desc, sparkType)
+    warmUpdater.readValues(NUM_ROWS, 0, vector, newPlainReader(bytes))
+
+    benchmark.addCase(label) { _ =>
+      vector.reset()
+      val u = fac.getUpdater(desc, sparkType)
+      u.readValues(NUM_ROWS, 0, vector, newPlainReader(bytes))
+    }
+  }
+
+  // --------------- Group A: identity Updaters ---------------
+
+  private def runIdentityBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Identity Updaters", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val intVec = new OnHeapColumnVector(NUM_ROWS, IntegerType)
+    val longVec = new OnHeapColumnVector(NUM_ROWS, LongType)
+    val floatVec = new OnHeapColumnVector(NUM_ROWS, FloatType)
+    val doubleVec = new OnHeapColumnVector(NUM_ROWS, DoubleType)
+    val boolVec = new OnHeapColumnVector(NUM_ROWS, BooleanType)
+    val byteVec = new OnHeapColumnVector(NUM_ROWS, ByteType)
+    val shortVec = new OnHeapColumnVector(NUM_ROWS, ShortType)
+    val binaryVec = new OnHeapColumnVector(NUM_ROWS, BinaryType)
+
+    val intBytes = plainIntBytes(NUM_ROWS)(i => i)
+    val longBytes = plainLongBytes(NUM_ROWS)(_.toLong)
+    val floatBytes = plainFloatBytes(NUM_ROWS)(_.toFloat)
+    val doubleBytes = plainDoubleBytes(NUM_ROWS)(_.toDouble)
+    val boolBytes = plainBooleanBytes(NUM_ROWS)
+    val binaryBytes = plainBinaryBytes(NUM_ROWS, payloadLen = 16)
+
+    addReadValuesCase(benchmark, "BooleanUpdater",
+      BooleanType, descriptor(PrimitiveTypeName.BOOLEAN), boolVec, boolBytes)
+    addReadValuesCase(benchmark, "ByteUpdater (INT32 -> Byte)",
+      ByteType, descriptor(PrimitiveTypeName.INT32), byteVec, intBytes)
+    addReadValuesCase(benchmark, "ShortUpdater (INT32 -> Short)",
+      ShortType, descriptor(PrimitiveTypeName.INT32), shortVec, intBytes)
+    addReadValuesCase(benchmark, "IntegerUpdater",
+      IntegerType, descriptor(PrimitiveTypeName.INT32), intVec, intBytes)
+    addReadValuesCase(benchmark, "LongUpdater",
+      LongType, descriptor(PrimitiveTypeName.INT64), longVec, longBytes)
+    addReadValuesCase(benchmark, "FloatUpdater",
+      FloatType, descriptor(PrimitiveTypeName.FLOAT), floatVec, floatBytes)
+    addReadValuesCase(benchmark, "DoubleUpdater",
+      DoubleType, descriptor(PrimitiveTypeName.DOUBLE), doubleVec, doubleBytes)
+    addReadValuesCase(benchmark, "BinaryUpdater",
+      BinaryType, descriptor(PrimitiveTypeName.BINARY), binaryVec, binaryBytes)
+
+    benchmark.run()
+  }
+
+  // --------------- Group B: type-converting Updaters ---------------
+
+  private def runTypeConvertingBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Type-converting Updaters", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val longVec = new OnHeapColumnVector(NUM_ROWS, LongType)
+    val doubleVec = new OnHeapColumnVector(NUM_ROWS, DoubleType)
+    val shortDecVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(9, 2))
+
+    val intBytes = plainIntBytes(NUM_ROWS)(i => i)
+    val longBytes = plainLongBytes(NUM_ROWS)(_.toLong)
+    val floatBytes = plainFloatBytes(NUM_ROWS)(_.toFloat)
+
+    addReadValuesCase(benchmark, "IntegerToLongUpdater",
+      LongType, descriptor(PrimitiveTypeName.INT32), longVec, intBytes)
+    addReadValuesCase(benchmark, "IntegerToDoubleUpdater",
+      DoubleType, descriptor(PrimitiveTypeName.INT32), doubleVec, intBytes)
+    addReadValuesCase(benchmark, "FloatToDoubleUpdater",
+      DoubleType, descriptor(PrimitiveTypeName.FLOAT), doubleVec, floatBytes)
+    addReadValuesCase(benchmark, "DateToTimestampNTZUpdater",
+      TimestampNTZType,
+      descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.dateType()),
+      longVec, intBytes)
+    // 32-bit-decimal target with INT64 source routes via canReadAsLongDecimal +
+    // is32BitDecimalType, both TRUE here, hence DowncastLongUpdater.
+    addReadValuesCase(benchmark, "DowncastLongUpdater (INT64 -> Decimal(9,2))",
+      DecimalType(9, 2),
+      descriptor(PrimitiveTypeName.INT64, LogicalTypeAnnotation.decimalType(2, 9)),
+      shortDecVec, longBytes)
+
+    benchmark.run()
+  }
+
+  // --------------- Group C: rebase Updaters ---------------
+
+  private def runRebaseBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Rebase Updaters", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val intVec = new OnHeapColumnVector(NUM_ROWS, IntegerType)
+    val longVec = new OnHeapColumnVector(NUM_ROWS, LongType)
+
+    // Post-1582 values measure the no-rebase fast path.
+    val intBytes = plainIntBytes(NUM_ROWS)(_ => 18000)
+    val longBytes = plainLongBytes(NUM_ROWS)(_ => 1577836800000000L)
+
+    addReadValuesCase(benchmark, "IntegerWithRebaseUpdater (DATE legacy)",
+      DateType,
+      descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.dateType()),
+      intVec, intBytes,
+      datetimeRebaseMode = "LEGACY")
+    addReadValuesCase(benchmark, "LongWithRebaseUpdater (TIMESTAMP_MICROS legacy)",
+      TimestampType,
+      descriptor(PrimitiveTypeName.INT64,
+        LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS)),
+      longVec, longBytes,
+      datetimeRebaseMode = "LEGACY")
+    addReadValuesCase(benchmark, "LongAsMicrosUpdater (TIMESTAMP_MILLIS)",
+      TimestampType,
+      descriptor(PrimitiveTypeName.INT64,
+        LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)),
+      longVec, longBytes)
+
+    benchmark.run()
+  }
+
+  // --------------- Group D: unsigned Updaters ---------------
+
+  private def runUnsignedBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Unsigned Updaters", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val longVec = new OnHeapColumnVector(NUM_ROWS, LongType)
+    val decimalVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(20, 0))
+
+    val intBytes = plainIntBytes(NUM_ROWS)(i => i)
+    val longBytes = plainLongBytes(NUM_ROWS)(_.toLong)
+
+    addReadValuesCase(benchmark, "UnsignedIntegerUpdater (UINT32 -> Long)",
+      LongType,
+      descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.intType(32, false)),
+      longVec, intBytes)
+    addReadValuesCase(benchmark, "UnsignedLongUpdater (UINT64 -> Decimal(20,0))",
+      DecimalType(20, 0),
+      descriptor(PrimitiveTypeName.INT64, LogicalTypeAnnotation.intType(64, false)),
+      decimalVec, longBytes)
+
+    benchmark.run()
+  }
+
+  // --------------- Group E: decimal Updaters ---------------
+
+  private def runDecimalBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Decimal Updaters", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val shortDecVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(9, 2))
+    val longDecVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(18, 4))
+    // FixedLenByteArrayToDecimal routes when:
+    //   1) scale mismatch (target=4 vs parquet=2) defeats canReadAsLong/Int/BinaryDecimal, and
+    //   2) target precision exceeds parquet precision by at least the scale increase, so
+    //      isDecimalTypeMatched succeeds and canReadAsDecimal is true.
+    // Hence target Decimal(18, 4) with parquet decimalType(scale=2, precision=16):
+    // precisionIncrease=2 >= scaleIncrease=2.
+    val flbaTargetVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(18, 4))
+
+    val intBytes = plainIntBytes(NUM_ROWS)(i => i)
+    val longBytes = plainLongBytes(NUM_ROWS)(_.toLong)
+    val flbaBytes = plainFixedLenBytes(NUM_ROWS, len = 8)
+
+    addReadValuesCase(benchmark, "IntegerToDecimalUpdater",
+      DecimalType(9, 2),
+      descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.decimalType(2, 9)),
+      shortDecVec, intBytes)
+    addReadValuesCase(benchmark, "LongToDecimalUpdater",
+      DecimalType(18, 4),
+      descriptor(PrimitiveTypeName.INT64, LogicalTypeAnnotation.decimalType(4, 18)),
+      longDecVec, longBytes)
+    // BinaryToDecimalUpdater is intentionally not benchmarked. Its `readValue`
+    // implementation uses the target vector as scratch via `putByteArray`, which requires
+    // the vector's `arrayData` child. Targets routed to this Updater have precision <= 18
+    // (DecimalType not byte-array decimal), so the WritableColumnVector constructor does
+    // not allocate `arrayData` and the call NPEs. The path is exercised only when a
+    // BINARY-source column has decimal precision <= 18, which is uncommon enough that
+    // this latent issue has not been a blocker. Skipping until the Updater itself is
+    // fixed to use a separate scratch buffer.
+    addReadValuesCase(benchmark, "FixedLenByteArrayToDecimalUpdater",
+      DecimalType(18, 4),
+      descriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
+        LogicalTypeAnnotation.decimalType(2, 16), typeLength = 8),
+      flbaTargetVec, flbaBytes)
+
+    benchmark.run()
+  }
+
+  // --------------- Group F: FixedLenByteArray Updaters ---------------
+
+  private def runFixedLenByteArrayBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "FixedLenByteArray Updaters", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val binaryVec = new OnHeapColumnVector(NUM_ROWS, BinaryType)
+    val shortDecVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(9, 2))
+    val longDecVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(18, 4))
+
+    val flbaBytes16 = plainFixedLenBytes(NUM_ROWS, len = 16)
+    val flbaBytes4 = plainFixedLenBytes(NUM_ROWS, len = 4)
+    val flbaBytes8 = plainFixedLenBytes(NUM_ROWS, len = 8)
+
+    addReadValuesCase(benchmark, "FixedLenByteArrayUpdater (len=16 -> Binary)",
+      BinaryType,
+      descriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, typeLength = 16),
+      binaryVec, flbaBytes16)
+    addReadValuesCase(benchmark, "FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2))",
+      DecimalType(9, 2),
+      descriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
+        LogicalTypeAnnotation.decimalType(2, 9), typeLength = 4),
+      shortDecVec, flbaBytes4)
+    addReadValuesCase(benchmark, "FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4))",
+      DecimalType(18, 4),
+      descriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
+        LogicalTypeAnnotation.decimalType(4, 18), typeLength = 8),
+      longDecVec, flbaBytes8)
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Identity Updaters") { runIdentityBenchmark() }
+    runBenchmark("Type-converting Updaters") { runTypeConvertingBenchmark() }
+    runBenchmark("Rebase Updaters") { runRebaseBenchmark() }
+    runBenchmark("Unsigned Updaters") { runUnsignedBenchmark() }
+    runBenchmark("Decimal Updaters") { runDecimalBenchmark() }
+    runBenchmark("FixedLenByteArray Updaters") { runFixedLenByteArrayBenchmark() }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaReaderBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaReaderBenchmark.scala
new file mode 100644
index 0000000000000..40e08d9870c71
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaReaderBenchmark.scala
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.nio.ByteBuffer
+
+import scala.util.Random
+
+import org.apache.parquet.bytes.{ByteBufferInputStream, DirectByteBufferAllocator}
+import org.apache.parquet.column.values.delta.{DeltaBinaryPackingValuesWriterForInteger, DeltaBinaryPackingValuesWriterForLong}
+import org.apache.parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesWriter
+import org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter
+import org.apache.parquet.io.api.Binary
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.types.{BinaryType, ByteType, DecimalType, IntegerType, LongType, ShortType}
+
+/**
+ * Low-level benchmark for the three Parquet delta-encoding decoders:
+ *
+ *   - `VectorizedDeltaBinaryPackedReader` (DELTA_BINARY_PACKED) - default INT32/INT64
+ *     encoding for Parquet v2.
+ *   - `VectorizedDeltaByteArrayReader` (DELTA_BYTE_ARRAY) - prefix+suffix string encoding.
+ *   - `VectorizedDeltaLengthByteArrayReader` (DELTA_LENGTH_BYTE_ARRAY) - length-prefixed
+ *     binary encoding.
+ *
+ * Coverage is intentionally broad - all three readers and their primary read/skip paths
+ * are included so the benchmark suite catches regressions across the full delta-decode
+ * surface, not just paths with an active optimization candidate.
+ *
+ * Groups:
+ *   A. DELTA_BINARY_PACKED INT32 - readIntegers / skipIntegers across value distributions.
+ *   B. DELTA_BINARY_PACKED INT64 - readLongs / skipLongs across value distributions.
+ *   C. DELTA_BYTE_ARRAY - readBinary / skipBinary across prefix-overlap ratios.
+ *   D. DELTA_LENGTH_BYTE_ARRAY - readBinary / skipBinary across payload sizes.
+ *   E. Variant reads - byte/short/unsigned bulk variants of DELTA_BINARY_PACKED, single-
+ *      value reads on all three readers, and skipBytes / skipShorts.
+ *
+ * To run this benchmark:
+ * {{{
+ *   1. build/sbt "sql/Test/runMain <this class>"
+ *   2. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results in "benchmarks/VectorizedDeltaReaderBenchmark-results.txt".
+ *   3. GHA: `Run benchmarks` workflow, class = `*VectorizedDeltaReader*`.
+ * }}}
+ */
+object VectorizedDeltaReaderBenchmark extends BenchmarkBase {
+
+  private val NUM_ROWS = 1024 * 1024
+  private val NUM_ITERS = 5
+  private val BLOCK_SIZE = 128
+  private val MINI_BLOCK_NUM = 4
+  private val PAGE_SIZE = 64 * 1024
+
+  // --------------- Group A: DELTA_BINARY_PACKED INT32 ---------------
+
+  private def encodeDeltaInts(values: Array[Int]): Array[Byte] = {
+    val writer = new DeltaBinaryPackingValuesWriterForInteger(
+      BLOCK_SIZE, MINI_BLOCK_NUM, PAGE_SIZE, PAGE_SIZE, new DirectByteBufferAllocator)
+    var i = 0
+    while (i < values.length) { writer.writeInteger(values(i)); i += 1 }
+    val out = writer.getBytes.toByteArray
+    writer.close()
+    out
+  }
+
+  private def runDeltaIntBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "DELTA_BINARY_PACKED INT32", NUM_ROWS.toLong, NUM_ITERS, output = output)
+    val vec = new OnHeapColumnVector(NUM_ROWS, IntegerType)
+
+    val rng = new Random(42)
+    val distributions: Seq[(String, Array[Int])] = Seq(
+      "constant" -> Array.fill(NUM_ROWS)(0),
+      "monotonic" -> Array.tabulate(NUM_ROWS)(identity),
+      "small-delta random" -> Array.tabulate(NUM_ROWS)(_ => rng.nextInt(256)),
+      "wide random" -> Array.tabulate(NUM_ROWS)(_ => rng.nextInt())
+    )
+
+    distributions.foreach { case (tag, values) =>
+      val bytes = encodeDeltaInts(values)
+
+      // Pre-warm.
+      val warm = new VectorizedDeltaBinaryPackedReader
+      warm.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+      warm.readIntegers(NUM_ROWS, vec, 0)
+
+      benchmark.addCase(s"readIntegers, $tag") { _ =>
+        val r = new VectorizedDeltaBinaryPackedReader
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        r.readIntegers(NUM_ROWS, vec, 0)
+      }
+
+      benchmark.addCase(s"skipIntegers, $tag") { _ =>
+        val r = new VectorizedDeltaBinaryPackedReader
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        r.skipIntegers(NUM_ROWS)
+      }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group B: DELTA_BINARY_PACKED INT64 ---------------
+
+  private def encodeDeltaLongs(values: Array[Long]): Array[Byte] = {
+    val writer = new DeltaBinaryPackingValuesWriterForLong(
+      BLOCK_SIZE, MINI_BLOCK_NUM, PAGE_SIZE, PAGE_SIZE, new DirectByteBufferAllocator)
+    var i = 0
+    while (i < values.length) { writer.writeLong(values(i)); i += 1 }
+    val out = writer.getBytes.toByteArray
+    writer.close()
+    out
+  }
+
+  private def runDeltaLongBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "DELTA_BINARY_PACKED INT64", NUM_ROWS.toLong, NUM_ITERS, output = output)
+    val vec = new OnHeapColumnVector(NUM_ROWS, LongType)
+
+    val rng = new Random(42)
+    val distributions: Seq[(String, Array[Long])] = Seq(
+      "constant" -> Array.fill(NUM_ROWS)(0L),
+      "monotonic" -> Array.tabulate(NUM_ROWS)(_.toLong),
+      "small-delta random" -> Array.tabulate(NUM_ROWS)(_ => rng.nextInt(256).toLong),
+      "wide random" -> Array.tabulate(NUM_ROWS)(_ => rng.nextLong())
+    )
+
+    distributions.foreach { case (tag, values) =>
+      val bytes = encodeDeltaLongs(values)
+
+      val warm = new VectorizedDeltaBinaryPackedReader
+      warm.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+      warm.readLongs(NUM_ROWS, vec, 0)
+
+      benchmark.addCase(s"readLongs, $tag") { _ =>
+        val r = new VectorizedDeltaBinaryPackedReader
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        r.readLongs(NUM_ROWS, vec, 0)
+      }
+
+      benchmark.addCase(s"skipLongs, $tag") { _ =>
+        val r = new VectorizedDeltaBinaryPackedReader
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        r.skipLongs(NUM_ROWS)
+      }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group C: DELTA_BYTE_ARRAY ---------------
+
+  private def encodeDeltaByteArray(values: Array[Array[Byte]]): Array[Byte] = {
+    val writer = new DeltaByteArrayWriter(PAGE_SIZE, PAGE_SIZE, new DirectByteBufferAllocator)
+    var i = 0
+    while (i < values.length) { writer.writeBytes(Binary.fromConstantByteArray(values(i))); i += 1 }
+    val out = writer.getBytes.toByteArray
+    writer.close()
+    out
+  }
+
+  /**
+   * Generate string-like binaries with `prefixOverlap` chars shared with the previous value
+   * and a random `suffixLen` of new bytes appended. Produces realistic delta-encoder input.
+   */
+  private def generateDeltaByteArrayValues(
+      count: Int, prefixOverlap: Int, suffixLen: Int): Array[Array[Byte]] = {
+    val rng = new Random(42)
+    val values = new Array[Array[Byte]](count)
+    var prev: Array[Byte] = new Array[Byte](0)
+    var i = 0
+    while (i < count) {
+      val keep = math.min(prefixOverlap, prev.length)
+      val v = new Array[Byte](keep + suffixLen)
+      System.arraycopy(prev, 0, v, 0, keep)
+      var j = 0
+      while (j < suffixLen) { v(keep + j) = rng.nextInt().toByte; j += 1 }
+      values(i) = v
+      prev = v
+      i += 1
+    }
+    values
+  }
+
+  private def runDeltaByteArrayBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "DELTA_BYTE_ARRAY", NUM_ROWS.toLong, NUM_ITERS, output = output)
+    val vec = new OnHeapColumnVector(NUM_ROWS, BinaryType)
+
+    val shapes: Seq[(String, Int, Int)] = Seq(
+      ("no overlap, len=16", 0, 16),
+      ("half overlap, len=16", 8, 16),
+      ("full overlap, len=16", 16, 16),
+      ("half overlap, len=64", 32, 64)
+    )
+
+    shapes.foreach { case (tag, prefix, suffix) =>
+      val values = generateDeltaByteArrayValues(NUM_ROWS, prefix, suffix)
+      val bytes = encodeDeltaByteArray(values)
+
+      val warm = ParquetTestAccess.newDeltaByteArrayReader()
+      warm.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+      vec.reset()
+      warm.readBinary(NUM_ROWS, vec, 0)
+
+      benchmark.addCase(s"readBinary, $tag") { _ =>
+        val r = ParquetTestAccess.newDeltaByteArrayReader()
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        vec.reset() // clear arrayData so payloads do not accumulate across iterations
+        r.readBinary(NUM_ROWS, vec, 0)
+      }
+
+      benchmark.addCase(s"skipBinary, $tag") { _ =>
+        val r = ParquetTestAccess.newDeltaByteArrayReader()
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        r.skipBinary(NUM_ROWS)
+      }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group D: DELTA_LENGTH_BYTE_ARRAY ---------------
+
+  private def encodeDeltaLengthByteArray(values: Array[Array[Byte]]): Array[Byte] = {
+    val writer = new DeltaLengthByteArrayValuesWriter(
+      PAGE_SIZE, PAGE_SIZE, new DirectByteBufferAllocator)
+    var i = 0
+    while (i < values.length) { writer.writeBytes(Binary.fromConstantByteArray(values(i))); i += 1 }
+    val out = writer.getBytes.toByteArray
+    writer.close()
+    out
+  }
+
+  private def runDeltaLengthByteArrayBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "DELTA_LENGTH_BYTE_ARRAY", NUM_ROWS.toLong, NUM_ITERS, output = output)
+    val vec = new OnHeapColumnVector(NUM_ROWS, BinaryType)
+    val rng = new Random(42)
+
+    val payloadSizes = Seq(8, 32, 128, 512)
+
+    payloadSizes.foreach { len =>
+      val values = Array.tabulate(NUM_ROWS) { _ =>
+        val v = new Array[Byte](len)
+        rng.nextBytes(v)
+        v
+      }
+      val bytes = encodeDeltaLengthByteArray(values)
+
+      val warm = ParquetTestAccess.newDeltaLengthByteArrayReader()
+      warm.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+      vec.reset()
+      warm.readBinary(NUM_ROWS, vec, 0)
+
+      benchmark.addCase(s"readBinary, payloadLen=$len") { _ =>
+        val r = ParquetTestAccess.newDeltaLengthByteArrayReader()
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        vec.reset()
+        r.readBinary(NUM_ROWS, vec, 0)
+      }
+
+      benchmark.addCase(s"skipBinary, payloadLen=$len") { _ =>
+        val r = ParquetTestAccess.newDeltaLengthByteArrayReader()
+        r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+        r.skipBinary(NUM_ROWS)
+      }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group E: variant reads ---------------
+
+  private def runVariantReadsBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Variant reads", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val rng = new Random(42)
+    val intValues = Array.tabulate(NUM_ROWS)(_ => rng.nextInt())
+    val longValues = Array.tabulate(NUM_ROWS)(_ => rng.nextLong())
+    val intBytes = encodeDeltaInts(intValues)
+    val longBytes = encodeDeltaLongs(longValues)
+
+    val byteVec = new OnHeapColumnVector(NUM_ROWS, ByteType)
+    val shortVec = new OnHeapColumnVector(NUM_ROWS, ShortType)
+    val longVec = new OnHeapColumnVector(NUM_ROWS, LongType)
+    val unsignedLongVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(20, 0))
+
+    def newIntReader(): VectorizedDeltaBinaryPackedReader = {
+      val r = new VectorizedDeltaBinaryPackedReader
+      r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(intBytes)))
+      r
+    }
+    def newLongReader(): VectorizedDeltaBinaryPackedReader = {
+      val r = new VectorizedDeltaBinaryPackedReader
+      r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(longBytes)))
+      r
+    }
+
+    // Bulk byte/short downcasts and unsigned conversions of DELTA_BINARY_PACKED data.
+    benchmark.addCase("readBytes (INT32)") { _ => newIntReader().readBytes(NUM_ROWS, byteVec, 0) }
+    benchmark.addCase("readShorts (INT32)") { _ =>
+      newIntReader().readShorts(NUM_ROWS, shortVec, 0)
+    }
+    benchmark.addCase("readUnsignedIntegers (INT32 -> Long)") { _ =>
+      newIntReader().readUnsignedIntegers(NUM_ROWS, longVec, 0)
+    }
+    benchmark.addCase("readUnsignedLongs (INT64 -> Decimal(20,0))") { _ =>
+      newLongReader().readUnsignedLongs(NUM_ROWS, unsignedLongVec, 0)
+    }
+    benchmark.addCase("skipBytes") { _ => newIntReader().skipBytes(NUM_ROWS) }
+    benchmark.addCase("skipShorts") { _ => newIntReader().skipShorts(NUM_ROWS) }
+
+    // Per-call overhead of single-value reads.
+    benchmark.addCase("readByte (INT32 single-value)") { _ =>
+      val r = newIntReader()
+      var i = 0; while (i < NUM_ROWS) { r.readByte(); i += 1 }
+    }
+    benchmark.addCase("readShort (INT32 single-value)") { _ =>
+      val r = newIntReader()
+      var i = 0; while (i < NUM_ROWS) { r.readShort(); i += 1 }
+    }
+    benchmark.addCase("readInteger (INT32 single-value)") { _ =>
+      val r = newIntReader()
+      var i = 0; while (i < NUM_ROWS) { r.readInteger(); i += 1 }
+    }
+    benchmark.addCase("readLong (INT64 single-value)") { _ =>
+      val r = newLongReader()
+      var i = 0; while (i < NUM_ROWS) { r.readLong(); i += 1 }
+    }
+
+    // Single-value readBinary(len) on DeltaByteArrayReader. Each call returns the
+    // next decoded value reconstructed from prefix/suffix state.
+    val binValues = generateDeltaByteArrayValues(NUM_ROWS, prefixOverlap = 8, suffixLen = 16)
+    val binBytes = encodeDeltaByteArray(binValues)
+    val perRowLen = 8 + 16 // matches the values produced by generateDeltaByteArrayValues
+    benchmark.addCase("readBinary(len) (DELTA_BYTE_ARRAY single-value)") { _ =>
+      val r = ParquetTestAccess.newDeltaByteArrayReader()
+      r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(binBytes)))
+      var i = 0; while (i < NUM_ROWS) { r.readBinary(perRowLen); i += 1 }
+    }
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("DELTA_BINARY_PACKED INT32") { runDeltaIntBenchmark() }
+    runBenchmark("DELTA_BINARY_PACKED INT64") { runDeltaLongBenchmark() }
+    runBenchmark("DELTA_BYTE_ARRAY") { runDeltaByteArrayBenchmark() }
+    runBenchmark("DELTA_LENGTH_BYTE_ARRAY") { runDeltaLengthByteArrayBenchmark() }
+    runBenchmark("Variant reads") { runVariantReadsBenchmark() }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReaderBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReaderBenchmark.scala
new file mode 100644
index 0000000000000..0b86656b828d6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReaderBenchmark.scala
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.nio.{ByteBuffer, ByteOrder}
+
+import org.apache.parquet.bytes.ByteBufferInputStream
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.types._
+
+/**
+ * Low-level benchmark for `VectorizedPlainValuesReader`. Measures every public
+ * read/skip method, including paths that are already memcpy-optimal, so the
+ * benchmark suite tracks the long-term performance baseline of the PLAIN decode
+ * surface and is ready for future iterative optimization without first having
+ * to add coverage.
+ *
+ * Groups:
+ *   A. Fixed-size bulk reads - readBooleans, readBytes, readShorts, readIntegers,
+ *      readLongs, readFloats, readDoubles. These are at memcpy speed today;
+ *      kept in the benchmark as regression tracking.
+ *   B. Conversion bulk reads - readUnsignedIntegers, readUnsignedLongs,
+ *      readIntegersWithRebase, readLongsWithRebase. Per-row loops with
+ *      conversion; potential P3-style optimization candidates.
+ *   C. Variable-length - readBinary(total, v, rowId), per-row slice + length
+ *      decode pattern.
+ *   D. Single-value reads - readBoolean, readByte, readShort, readInteger,
+ *      readLong, readFloat, readDouble looped NUM_ROWS times. Measures per-call
+ *      overhead.
+ *   E. Skip paths - skipBooleans, skipBytes, skipShorts, skipIntegers, skipLongs,
+ *      skipFloats, skipDoubles, skipBinary, skipFixedLenByteArray.
+ *
+ * To run this benchmark:
+ * {{{
+ *   1. build/sbt "sql/Test/runMain <this class>"
+ *   2. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results in "benchmarks/VectorizedPlainValuesReaderBenchmark-results.txt".
+ *   3. GHA: `Run benchmarks` workflow, class = `*VectorizedPlainValuesReader*`.
+ * }}}
+ */
+object VectorizedPlainValuesReaderBenchmark extends BenchmarkBase {
+
+  private val NUM_ROWS = 1024 * 1024
+  private val NUM_ITERS = 5
+
+  // Volatile sink to prevent JIT from eliminating skip-only benchmark bodies. Trivial
+  // skip implementations are just `in.skip(N)` (a position increment); without an
+  // observable side effect, escape analysis and dead-code elimination can collapse the
+  // entire `newReader(...).skipX(...)` body to nothing, producing meaningless Infinity
+  // rates. Each skip case reads one byte after the skip and accumulates into `sink`
+  // to anchor the work.
+  @volatile private var sink: Long = 0L
+
+  // Append 8 trailing 0x00 bytes so the post-skip anchoring `readLong()` always has
+  // data to consume even when the skip exhausts the original payload. Plain
+  // single-value reads internally pull 4-8 bytes (e.g., readByte reads via readInteger,
+  // readLong reads 8), so the pad must cover the largest such read.
+  private val SINK_PAD_BYTES = 8
+  private def withTailPad(bytes: Array[Byte]): Array[Byte] = {
+    val out = new Array[Byte](bytes.length + SINK_PAD_BYTES)
+    System.arraycopy(bytes, 0, out, 0, bytes.length)
+    out
+  }
+
+  // --------------- PLAIN-encoded byte producers ---------------
+
+  private def plainIntBytes(count: Int)(f: Int => Int): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 4).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putInt(f(i)); i += 1 }
+    buf.array()
+  }
+
+  private def plainLongBytes(count: Int)(f: Int => Long): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 8).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putLong(f(i)); i += 1 }
+    buf.array()
+  }
+
+  private def plainFloatBytes(count: Int)(f: Int => Float): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 4).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putFloat(f(i)); i += 1 }
+    buf.array()
+  }
+
+  private def plainDoubleBytes(count: Int)(f: Int => Double): Array[Byte] = {
+    val buf = ByteBuffer.allocate(count * 8).order(ByteOrder.LITTLE_ENDIAN)
+    var i = 0
+    while (i < count) { buf.putDouble(f(i)); i += 1 }
+    buf.array()
+  }
+
+  // PLAIN booleans are bit-packed: 8 booleans per byte.
+  private def plainBooleanBytes(count: Int): Array[Byte] = {
+    val byteCount = (count + 7) / 8
+    val out = new Array[Byte](byteCount)
+    var i = 0
+    while (i < count) {
+      if ((i & 1) == 0) out(i / 8) = (out(i / 8) | (1 << (i % 8))).toByte
+      i += 1
+    }
+    out
+  }
+
+  // 4-byte length prefix + payload, repeated.
+  private def plainBinaryBytes(count: Int, payloadLen: Int): Array[Byte] = {
+    val recordLen = 4 + payloadLen
+    val buf = ByteBuffer.allocate(count * recordLen).order(ByteOrder.LITTLE_ENDIAN)
+    val payload = new Array[Byte](payloadLen)
+    var i = 0
+    while (i < count) {
+      buf.putInt(payloadLen)
+      buf.put(payload)
+      i += 1
+    }
+    buf.array()
+  }
+
+  private def newReader(bytes: Array[Byte]): VectorizedPlainValuesReader = {
+    val r = new VectorizedPlainValuesReader
+    r.initFromPage(NUM_ROWS, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)))
+    r
+  }
+
+  /** Adds a case that runs `body` after pre-warming the body once. */
+  private def addCase(benchmark: Benchmark, label: String)(body: () => Unit): Unit = {
+    body()
+    benchmark.addCase(label) { _ => body() }
+  }
+
+  // --------------- Group A: fixed-size bulk reads ---------------
+
+  private def runBulkBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Fixed-size bulk reads", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val intVec = new OnHeapColumnVector(NUM_ROWS, IntegerType)
+    val longVec = new OnHeapColumnVector(NUM_ROWS, LongType)
+    val floatVec = new OnHeapColumnVector(NUM_ROWS, FloatType)
+    val doubleVec = new OnHeapColumnVector(NUM_ROWS, DoubleType)
+    val boolVec = new OnHeapColumnVector(NUM_ROWS, BooleanType)
+    val byteVec = new OnHeapColumnVector(NUM_ROWS, ByteType)
+    val shortVec = new OnHeapColumnVector(NUM_ROWS, ShortType)
+
+    val intBytes = plainIntBytes(NUM_ROWS)(i => i)
+    val longBytes = plainLongBytes(NUM_ROWS)(_.toLong)
+    val floatBytes = plainFloatBytes(NUM_ROWS)(_.toFloat)
+    val doubleBytes = plainDoubleBytes(NUM_ROWS)(_.toDouble)
+    val boolBytes = plainBooleanBytes(NUM_ROWS)
+
+    addCase(benchmark, "readBooleans") { () =>
+      newReader(boolBytes).readBooleans(NUM_ROWS, boolVec, 0)
+    }
+    addCase(benchmark, "readBytes") { () =>
+      newReader(intBytes).readBytes(NUM_ROWS, byteVec, 0)
+    }
+    addCase(benchmark, "readShorts") { () =>
+      newReader(intBytes).readShorts(NUM_ROWS, shortVec, 0)
+    }
+    addCase(benchmark, "readIntegers") { () =>
+      newReader(intBytes).readIntegers(NUM_ROWS, intVec, 0)
+    }
+    addCase(benchmark, "readLongs") { () =>
+      newReader(longBytes).readLongs(NUM_ROWS, longVec, 0)
+    }
+    addCase(benchmark, "readFloats") { () =>
+      newReader(floatBytes).readFloats(NUM_ROWS, floatVec, 0)
+    }
+    addCase(benchmark, "readDoubles") { () =>
+      newReader(doubleBytes).readDoubles(NUM_ROWS, doubleVec, 0)
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group B: conversion bulk reads ---------------
+
+  private def runConversionBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Conversion bulk reads", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val longVec = new OnHeapColumnVector(NUM_ROWS, LongType)
+    val intVec = new OnHeapColumnVector(NUM_ROWS, IntegerType)
+    val tsVec = new OnHeapColumnVector(NUM_ROWS, TimestampType)
+    // readUnsignedLongs stores UINT64 values via putByteArray into arrayData(), so the
+    // target vector must be a byte-array decimal (precision > 18) for arrayData to be
+    // allocated. Decimal(20, 0) is the canonical UINT64 mapping in production.
+    val uint64DecVec = new OnHeapColumnVector(NUM_ROWS, DecimalType(20, 0))
+
+    val intBytes = plainIntBytes(NUM_ROWS)(i => i)
+    val longBytes = plainLongBytes(NUM_ROWS)(_.toLong)
+
+    // UINT32 -> Long via readUnsignedIntegers (per-row loop in the reader).
+    addCase(benchmark, "readUnsignedIntegers") { () =>
+      newReader(intBytes).readUnsignedIntegers(NUM_ROWS, longVec, 0)
+    }
+
+    // UINT64 -> Decimal(20, 0) via readUnsignedLongs (per-row loop).
+    addCase(benchmark, "readUnsignedLongs") { () =>
+      uint64DecVec.reset() // clear arrayData so payloads do not accumulate across iterations
+      newReader(longBytes).readUnsignedLongs(NUM_ROWS, uint64DecVec, 0)
+    }
+
+    // DATE legacy rebase. Use values that don't trigger rebase to measure the no-rebase
+    // fast path; failIfRebase=false is the common production setting.
+    val safeDateBytes = plainIntBytes(NUM_ROWS)(_ => 18000)
+    addCase(benchmark, "readIntegersWithRebase, no rebase needed") { () =>
+      newReader(safeDateBytes).readIntegersWithRebase(NUM_ROWS, intVec, 0, false)
+    }
+
+    // TIMESTAMP_MICROS legacy rebase, no-rebase values.
+    val safeTsBytes = plainLongBytes(NUM_ROWS)(_ => 1577836800000000L)
+    addCase(benchmark, "readLongsWithRebase, no rebase needed") { () =>
+      newReader(safeTsBytes).readLongsWithRebase(NUM_ROWS, tsVec, 0, false, "UTC")
+    }
+
+    benchmark.run()
+  }
+
+  // --------------- Group C: variable-length reads ---------------
+
+  private def runVariableLengthBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Variable-length reads", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val binaryVec = new OnHeapColumnVector(NUM_ROWS, BinaryType)
+
+    Seq(8, 32, 128, 512).foreach { payloadLen =>
+      val bytes = plainBinaryBytes(NUM_ROWS, payloadLen)
+      addCase(benchmark, s"readBinary, payloadLen=$payloadLen") { () =>
+        binaryVec.reset() // clear arrayData so payloads do not accumulate across iterations
+        newReader(bytes).readBinary(NUM_ROWS, binaryVec, 0)
+      }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group D: single-value reads ---------------
+
+  private def runSingleValueBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Single-value reads", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    val intBytes = plainIntBytes(NUM_ROWS)(i => i)
+    val longBytes = plainLongBytes(NUM_ROWS)(_.toLong)
+    val floatBytes = plainFloatBytes(NUM_ROWS)(_.toFloat)
+    val doubleBytes = plainDoubleBytes(NUM_ROWS)(_.toDouble)
+    val boolBytes = plainBooleanBytes(NUM_ROWS)
+
+    addCase(benchmark, "readBoolean") { () =>
+      val r = newReader(boolBytes)
+      var j = 0
+      while (j < NUM_ROWS) { r.readBoolean(); j += 1 }
+    }
+    addCase(benchmark, "readByte") { () =>
+      val r = newReader(intBytes)
+      var j = 0
+      while (j < NUM_ROWS) { r.readByte(); j += 1 }
+    }
+    addCase(benchmark, "readShort") { () =>
+      val r = newReader(intBytes)
+      var j = 0
+      while (j < NUM_ROWS) { r.readShort(); j += 1 }
+    }
+    addCase(benchmark, "readInteger") { () =>
+      val r = newReader(intBytes)
+      var j = 0
+      while (j < NUM_ROWS) { r.readInteger(); j += 1 }
+    }
+    addCase(benchmark, "readLong") { () =>
+      val r = newReader(longBytes)
+      var j = 0
+      while (j < NUM_ROWS) { r.readLong(); j += 1 }
+    }
+    addCase(benchmark, "readFloat") { () =>
+      val r = newReader(floatBytes)
+      var j = 0
+      while (j < NUM_ROWS) { r.readFloat(); j += 1 }
+    }
+    addCase(benchmark, "readDouble") { () =>
+      val r = newReader(doubleBytes)
+      var j = 0
+      while (j < NUM_ROWS) { r.readDouble(); j += 1 }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group E: skip paths ---------------
+
+  private def runSkipBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Skip", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    // Pad each input by 1 byte so the post-skip readByte() anchors a side effect
+    // and prevents JIT from eliding the skip body.
+    val intBytes = withTailPad(plainIntBytes(NUM_ROWS)(i => i))
+    val longBytes = withTailPad(plainLongBytes(NUM_ROWS)(_.toLong))
+    val floatBytes = withTailPad(plainFloatBytes(NUM_ROWS)(_.toFloat))
+    val doubleBytes = withTailPad(plainDoubleBytes(NUM_ROWS)(_.toDouble))
+    val boolBytes = withTailPad(plainBooleanBytes(NUM_ROWS))
+
+    Seq(8, 32, 128, 512).foreach { payloadLen =>
+      val bytes = withTailPad(plainBinaryBytes(NUM_ROWS, payloadLen))
+      addCase(benchmark, s"skipBinary, payloadLen=$payloadLen") { () =>
+        val r = newReader(bytes)
+        r.skipBinary(NUM_ROWS)
+        sink += r.readLong()
+      }
+    }
+
+    Seq(4, 16, 64).foreach { len =>
+      val bytes = withTailPad(new Array[Byte](NUM_ROWS * len))
+      addCase(benchmark, s"skipFixedLenByteArray, len=$len") { () =>
+        val r = newReader(bytes)
+        r.skipFixedLenByteArray(NUM_ROWS, len)
+        sink += r.readLong()
+      }
+    }
+
+    addCase(benchmark, "skipBooleans") { () =>
+      val r = newReader(boolBytes)
+      r.skipBooleans(NUM_ROWS)
+      sink += r.readLong()
+    }
+    addCase(benchmark, "skipBytes") { () =>
+      val r = newReader(intBytes)
+      r.skipBytes(NUM_ROWS)
+      sink += r.readLong()
+    }
+    addCase(benchmark, "skipShorts") { () =>
+      val r = newReader(intBytes)
+      r.skipShorts(NUM_ROWS)
+      sink += r.readLong()
+    }
+    addCase(benchmark, "skipIntegers") { () =>
+      val r = newReader(intBytes)
+      r.skipIntegers(NUM_ROWS)
+      sink += r.readLong()
+    }
+    addCase(benchmark, "skipLongs") { () =>
+      val r = newReader(longBytes)
+      r.skipLongs(NUM_ROWS)
+      sink += r.readLong()
+    }
+    addCase(benchmark, "skipFloats") { () =>
+      val r = newReader(floatBytes)
+      r.skipFloats(NUM_ROWS)
+      sink += r.readLong()
+    }
+    addCase(benchmark, "skipDoubles") { () =>
+      val r = newReader(doubleBytes)
+      r.skipDoubles(NUM_ROWS)
+      sink += r.readLong()
+    }
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Fixed-size bulk reads") { runBulkBenchmark() }
+    runBenchmark("Conversion bulk reads") { runConversionBenchmark() }
+    runBenchmark("Variable-length reads") { runVariableLengthBenchmark() }
+    runBenchmark("Single-value reads") { runSingleValueBenchmark() }
+    runBenchmark("Skip") { runSkipBenchmark() }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala
index 008b18cdcab16..6f99633a6a354 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderBenchmark.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.nio.ByteBuffer
+import java.util.PrimitiveIterator
+import java.util.stream.LongStream
 
 import scala.util.Random
 
@@ -40,6 +42,20 @@ import org.apache.spark.sql.types.{BooleanType, IntegerType}
  *      path used when the caller needs materialized definition levels (e.g., nested columns).
  *   D. readBatch nullable without def-level materialization -- the `readBatchInternal` path used
  *      for flat nullable columns where only null/non-null disposition matters.
+ *   E. readBatch with row-index filtering -- exercises the with-filter code path through
+ *      `readBatchInternal{WithDefLevels}`'s range checks when Parquet column-index filtering is
+ *      active. Sweep over two filter shapes (single contiguous range, alternating windows) and
+ *      the same null ratios as C/D.
+ *   F. Single-value reads -- per-call overhead of `readBoolean`, `readInteger`,
+ *      `readValueDictionaryId` looped NUM_ROWS times. Establishes baseline against the bulk
+ *      Group A/B paths.
+ *   G. skipBooleans / skipIntegers -- forward-skip path used by row-index filtering and
+ *      pushdown. RLE + PACKED across the same parameter sweeps as A/B.
+ *
+ * Not yet covered (deferred): `readBatchRepeated` and `readIntegersRepeated` for nested
+ * columns require setting up a `ParquetReadState` with `maxRepetitionLevel > 0`, a separate
+ * def-levels reader, and encoded rep-level streams; better added together with the matching
+ * suite-level coverage in a focused follow-up.
  *
  * Cold = fresh reader per iteration (exercises cold `currentBuffer` growth).
  * Reused = reader pre-warmed outside the timed region; inside is only `initFromPage` + read.
@@ -68,10 +84,24 @@ object VectorizedRleValuesReaderBenchmark extends BenchmarkBase {
   // --------------- ReadState helpers (delegate to shared reflection bridge) ---------------
 
   private def newReadState(maxDef: Int, valuesInPage: Int): AnyRef = {
-    val state = ParquetReadStateTestAccess.newState(
+    val state = ParquetTestAccess.newState(
       intColumnDescriptor(maxDef), maxDef == 0)
-    ParquetReadStateTestAccess.resetForNewBatch(state, BATCH_SIZE)
-    ParquetReadStateTestAccess.resetForNewPage(state, valuesInPage, 0L)
+    ParquetTestAccess.resetForNewBatch(state, BATCH_SIZE)
+    ParquetTestAccess.resetForNewPage(state, valuesInPage, 0L)
+    state
+  }
+
+  // State variant with a fresh row-index iterator. `rowRanges` inside ParquetReadState is
+  // iterated forward and never reset, so Group E measurements must construct a new state per
+  // benchmark iteration. The iterator is built from `indexFactory` on each call.
+  private def newReadStateWithRowIndexes(
+      maxDef: Int,
+      valuesInPage: Int,
+      indexFactory: () => PrimitiveIterator.OfLong): AnyRef = {
+    val state = ParquetTestAccess.newState(
+      intColumnDescriptor(maxDef), maxDef == 0, indexFactory())
+    ParquetTestAccess.resetForNewBatch(state, BATCH_SIZE)
+    ParquetTestAccess.resetForNewPage(state, valuesInPage, 0L)
     state
   }
 
@@ -258,7 +288,7 @@ object VectorizedRleValuesReaderBenchmark extends BenchmarkBase {
           benchmark.addCase(
               f"nullRatio=${nullRatio}%.1f, $clusterTag") { _ =>
             reader.initFromPage(NUM_ROWS, toInputStream(bytes))
-            ParquetReadStateTestAccess.resetForNewPage(
+            ParquetTestAccess.resetForNewPage(
               state, NUM_ROWS, 0L)
             runBatches(reader, state, values, defLevelsVec, factory())
           }
@@ -277,13 +307,188 @@ object VectorizedRleValuesReaderBenchmark extends BenchmarkBase {
     var produced = 0
     while (produced < NUM_ROWS) {
       val toRead = math.min(BATCH_SIZE, NUM_ROWS - produced)
-      ParquetReadStateTestAccess.resetForNewBatch(state, toRead)
-      ParquetReadStateTestAccess.readBatch(
+      ParquetTestAccess.resetForNewBatch(state, toRead)
+      ParquetTestAccess.readBatch(
         reader, state, values, defLevelsVec, valueReader, integerUpdater)
       produced += toRead
     }
   }
 
+  // --------------- Group E: readBatch with row-index filtering ---------------
+
+  /** Kept row indices: contiguous range `[keptStart, keptStart + keptCount)`. */
+  private def contiguousIndexFactory(
+      keptStart: Long,
+      keptCount: Long): () => PrimitiveIterator.OfLong =
+    () => LongStream.range(keptStart, keptStart + keptCount).iterator()
+
+  /** Kept row indices: every other `windowSize`-row window starting at index 0. */
+  private def alternatingWindowsIndexFactory(
+      totalRows: Long,
+      windowSize: Long): () => PrimitiveIterator.OfLong = { () =>
+    new PrimitiveIterator.OfLong {
+      private var i = 0L
+      override def hasNext: Boolean = i < totalRows
+      override def nextLong(): Long = {
+        // Advance through only the "kept" windows. Window k in [k*windowSize, (k+1)*windowSize)
+        // is kept when k is even.
+        while ((i / windowSize) % 2 != 0 && i < totalRows) i += 1
+        if (i >= totalRows) throw new NoSuchElementException
+        val out = i
+        i += 1
+        out
+      }
+    }
+  }
+
+  private def runRowRangeFilterBenchmark(
+      label: String,
+      buildValueReader: Int => ValueReaderFactory,
+      materializeDefLevels: Boolean): Unit = {
+    val benchmark = new Benchmark(
+      label, NUM_ROWS.toLong, NUM_ITERS, output = output)
+    val values = new OnHeapColumnVector(NUM_ROWS, IntegerType)
+    val defLevelsVec: WritableColumnVector =
+      if (materializeDefLevels) new OnHeapColumnVector(NUM_ROWS, IntegerType)
+      else null
+
+    // One contiguous range covering the middle 50% of rows; and alternating 1000-row windows
+    // (50% kept, but with many skip/read transitions inside each batch).
+    val filterShapes: Seq[(String, () => PrimitiveIterator.OfLong)] = Seq(
+      "contiguous 50%" -> contiguousIndexFactory(NUM_ROWS / 4L, NUM_ROWS / 2L),
+      "alt 1000-row windows" -> alternatingWindowsIndexFactory(NUM_ROWS.toLong, 1000L)
+    )
+
+    val nullRatios = Seq(0.0, 0.3, 0.9)
+
+    filterShapes.foreach { case (shapeTag, indexFactory) =>
+      nullRatios.foreach { nullRatio =>
+        val defLevels =
+          packedFriendlyDefLevels(NUM_ROWS, nullRatio, clustered = false)
+        val nonNullCount = defLevels.count(_ == 1)
+        val bytes = encodeRle(defLevels, bitWidth = 1)
+        val factory = buildValueReader(nonNullCount)
+
+        // Pre-warm the full pipeline with a fresh state so JIT has seen the with-filter path.
+        val reader = new VectorizedRleValuesReader(1, false)
+        reader.initFromPage(NUM_ROWS, toInputStream(bytes))
+        val warmState =
+          newReadStateWithRowIndexes(maxDef = 1, valuesInPage = NUM_ROWS, indexFactory)
+        runBatches(reader, warmState, values, defLevelsVec, factory())
+
+        benchmark.addCase(
+            f"nullRatio=${nullRatio}%.1f, $shapeTag") { _ =>
+          reader.initFromPage(NUM_ROWS, toInputStream(bytes))
+          // `rowRanges` in ParquetReadState is iterated forward and not reset by
+          // resetForNewPage/Batch, so we must construct a fresh state per measurement
+          // iteration. Iterator construction cost is small compared to decoding NUM_ROWS.
+          val state =
+            newReadStateWithRowIndexes(maxDef = 1, valuesInPage = NUM_ROWS, indexFactory)
+          runBatches(reader, state, values, defLevelsVec, factory())
+        }
+      }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group F: single-value reads ---------------
+
+  private def runSingleValueBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Single-value reads", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    // Boolean - bitWidth=1, alternating values (forces PACKED).
+    val boolBytes = encodeRle(packedFriendlyBooleans(NUM_ROWS, 0.5), bitWidth = 1)
+    val boolWarm = new VectorizedRleValuesReader(1, false)
+    boolWarm.initFromPage(NUM_ROWS, toInputStream(boolBytes))
+    var i = 0
+    while (i < NUM_ROWS) { boolWarm.readBoolean(); i += 1 }
+
+    benchmark.addCase("readBoolean") { _ =>
+      val r = new VectorizedRleValuesReader(1, false)
+      r.initFromPage(NUM_ROWS, toInputStream(boolBytes))
+      var j = 0
+      while (j < NUM_ROWS) { r.readBoolean(); j += 1 }
+    }
+
+    // readInteger / readValueDictionaryId across bitWidths. Use random PACKED data so each
+    // call reads a fresh value (RLE-only data would short-circuit too aggressively).
+    Seq(4, 8, 12, 20).foreach { bitWidth =>
+      val bytes = encodeRle(packedFriendlyDictIds(NUM_ROWS, bitWidth), bitWidth)
+
+      val warmInt = new VectorizedRleValuesReader(bitWidth, false)
+      warmInt.initFromPage(NUM_ROWS, toInputStream(bytes))
+      var k = 0
+      while (k < NUM_ROWS) { warmInt.readInteger(); k += 1 }
+
+      benchmark.addCase(s"readInteger, bitWidth=$bitWidth") { _ =>
+        val r = new VectorizedRleValuesReader(bitWidth, false)
+        r.initFromPage(NUM_ROWS, toInputStream(bytes))
+        var j = 0
+        while (j < NUM_ROWS) { r.readInteger(); j += 1 }
+      }
+
+      benchmark.addCase(s"readValueDictionaryId, bitWidth=$bitWidth") { _ =>
+        val r = new VectorizedRleValuesReader(bitWidth, false)
+        r.initFromPage(NUM_ROWS, toInputStream(bytes))
+        var j = 0
+        while (j < NUM_ROWS) { r.readValueDictionaryId(); j += 1 }
+      }
+    }
+    benchmark.run()
+  }
+
+  // --------------- Group G: skip paths ---------------
+
+  private def runSkipBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      "Skip", NUM_ROWS.toLong, NUM_ITERS, output = output)
+
+    // skipBooleans across the same true-ratio sweep as Group A.
+    Seq(0.0, 0.5, 1.0).foreach { trueRatio =>
+      val bytes = encodeRle(
+        packedFriendlyBooleans(NUM_ROWS, trueRatio), bitWidth = 1)
+
+      val warm = new VectorizedRleValuesReader(1, false)
+      warm.initFromPage(NUM_ROWS, toInputStream(bytes))
+      warm.skipBooleans(NUM_ROWS)
+
+      benchmark.addCase(f"skipBooleans, trueRatio=${trueRatio}%.1f") { _ =>
+        val r = new VectorizedRleValuesReader(1, false)
+        r.initFromPage(NUM_ROWS, toInputStream(bytes))
+        r.skipBooleans(NUM_ROWS)
+      }
+    }
+
+    // skipIntegers across the same bitWidth sweep as Group B; PACKED + RLE shapes.
+    Seq(4, 8, 12, 20).foreach { bitWidth =>
+      val packedBytes = encodeRle(
+        packedFriendlyDictIds(NUM_ROWS, bitWidth), bitWidth)
+      val rleBytes = encodeRle(Array.fill(NUM_ROWS)(0), bitWidth)
+
+      val warmPacked = new VectorizedRleValuesReader(bitWidth, false)
+      warmPacked.initFromPage(NUM_ROWS, toInputStream(packedBytes))
+      warmPacked.skipIntegers(NUM_ROWS)
+
+      benchmark.addCase(s"skipIntegers PACKED, bitWidth=$bitWidth") { _ =>
+        val r = new VectorizedRleValuesReader(bitWidth, false)
+        r.initFromPage(NUM_ROWS, toInputStream(packedBytes))
+        r.skipIntegers(NUM_ROWS)
+      }
+
+      val warmRle = new VectorizedRleValuesReader(bitWidth, false)
+      warmRle.initFromPage(NUM_ROWS, toInputStream(rleBytes))
+      warmRle.skipIntegers(NUM_ROWS)
+
+      benchmark.addCase(s"skipIntegers RLE, bitWidth=$bitWidth") { _ =>
+        val r = new VectorizedRleValuesReader(bitWidth, false)
+        r.initFromPage(NUM_ROWS, toInputStream(rleBytes))
+        r.skipIntegers(NUM_ROWS)
+      }
+    }
+    benchmark.run()
+  }
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     runBenchmark("Boolean decode") {
       runBooleanBenchmark()
@@ -303,5 +508,23 @@ object VectorizedRleValuesReaderBenchmark extends BenchmarkBase {
         plainIntFactory,
         materializeDefLevels = false)
     }
+    runBenchmark("Nullable batch decode with row-index filtering (with def-levels)") {
+      runRowRangeFilterBenchmark(
+        "Nullable batch with def-levels, row-index filtered",
+        plainIntFactory,
+        materializeDefLevels = true)
+    }
+    runBenchmark("Nullable batch decode with row-index filtering (without def-levels)") {
+      runRowRangeFilterBenchmark(
+        "Nullable batch without def-levels, row-index filtered",
+        plainIntFactory,
+        materializeDefLevels = false)
+    }
+    runBenchmark("Single-value reads") {
+      runSingleValueBenchmark()
+    }
+    runBenchmark("Skip") {
+      runSkipBenchmark()
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderSuite.scala
index 68820fc82e9df..4ee17fb19c92b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReaderSuite.scala
@@ -158,8 +158,8 @@ private object VectorizedRleValuesReaderSuite {
     val valueReader = new VectorizedPlainValuesReader
     valueReader.initFromPage(
       nonNullCount, ByteBufferInputStream.wrap(ByteBuffer.wrap(plainBytes)))
-    val state = ParquetReadStateTestAccess.newState(intColumnDescriptor(maxDef), maxDef == 0)
-    ParquetReadStateTestAccess.resetForNewPage(state, n, 0L)
+    val state = ParquetTestAccess.newState(intColumnDescriptor(maxDef), maxDef == 0)
+    ParquetTestAccess.resetForNewPage(state, n, 0L)
 
     var produced = 0
     var expectedValueIdx = 0
@@ -167,9 +167,9 @@ private object VectorizedRleValuesReaderSuite {
       val toRead = math.min(batchSize, n - produced)
       val values = new OnHeapColumnVector(toRead, IntegerType)
       val defLevelsVec = new OnHeapColumnVector(toRead, IntegerType)
-      ParquetReadStateTestAccess.resetForNewBatch(state, toRead)
+      ParquetTestAccess.resetForNewBatch(state, toRead)
       val defLevelsArg: WritableColumnVector = if (withDefLevels) defLevelsVec else null
-      ParquetReadStateTestAccess.readBatch(
+      ParquetTestAccess.readBatch(
         reader, state, values, defLevelsArg, valueReader, integerUpdater)
 
       var expectedNullsInBatch = 0
@@ -223,14 +223,14 @@ private object VectorizedRleValuesReaderSuite {
     val valueReader = new VectorizedPlainValuesReader
     valueReader.initFromPage(
       nonNullCount, ByteBufferInputStream.wrap(ByteBuffer.wrap(plainBytes)))
-    val state = ParquetReadStateTestAccess.newState(
+    val state = ParquetTestAccess.newState(
       intColumnDescriptor(maxDef), maxDef == 0, longIterator(includedPositions))
-    ParquetReadStateTestAccess.resetForNewPage(state, n, 0L)
+    ParquetTestAccess.resetForNewPage(state, n, 0L)
 
     val size = includedPositions.length
     val values = new OnHeapColumnVector(size, IntegerType)
-    ParquetReadStateTestAccess.resetForNewBatch(state, size)
-    ParquetReadStateTestAccess.readBatch(reader, state, values, null, valueReader, integerUpdater)
+    ParquetTestAccess.resetForNewBatch(state, size)
+    ParquetTestAccess.readBatch(reader, state, values, null, valueReader, integerUpdater)
 
     val prefixNonNulls = defLevels.scanLeft(0) { (c, d) =>
       c + (if (d == maxDef) 1 else 0)
@@ -262,7 +262,7 @@ private object VectorizedRleValuesReaderSuite {
     val bitWidth = if (maxDef == 0) 0 else 32 - Integer.numberOfLeadingZeros(maxDef)
     val reader = new VectorizedRleValuesReader(bitWidth, false)
     val state =
-      ParquetReadStateTestAccess.newState(intColumnDescriptor(maxDef), maxDef == 0)
+      ParquetTestAccess.newState(intColumnDescriptor(maxDef), maxDef == 0)
 
     var pageFirstRow = 0L
     pages.foreach { pageDefLevels =>
@@ -275,15 +275,15 @@ private object VectorizedRleValuesReaderSuite {
       val valueReader = new VectorizedPlainValuesReader
       valueReader.initFromPage(
         nonNullCount, ByteBufferInputStream.wrap(ByteBuffer.wrap(plainBytes)))
-      ParquetReadStateTestAccess.resetForNewPage(state, pageN, pageFirstRow)
+      ParquetTestAccess.resetForNewPage(state, pageN, pageFirstRow)
 
       var produced = 0
       var expectedValueIdx = 0
       while (produced < pageN) {
         val toRead = math.min(batchSize, pageN - produced)
         val values = new OnHeapColumnVector(toRead, IntegerType)
-        ParquetReadStateTestAccess.resetForNewBatch(state, toRead)
-        ParquetReadStateTestAccess.readBatch(
+        ParquetTestAccess.resetForNewBatch(state, toRead)
+        ParquetTestAccess.readBatch(
           reader, state, values, null, valueReader, integerUpdater)
 
         var i = 0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExecSuite.scala
index 35b3a32e33d82..5d2adeb0c00af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/GroupPartitionsExecSuite.scala
@@ -146,7 +146,7 @@ class GroupPartitionsExecSuite extends SharedSparkSession {
   test("SPARK-55715: sorted merge config enabled but child not SafeForKWayMerge falls back " +
       "to key-expression ordering") {
     // DummySparkPlan does not extend SafeForKWayMerge, so childIsSafeForKWayMerge = false and
-    // canUseSortedMerge = false even when the preserve-ordering config is on. outputOrdering must
+    // canUseSortedMerge = false even with enableSortedMerge = true. outputOrdering must
     // therefore fall back to key-expression filtering (not return the full child ordering).
     val partitionKeys = Seq(row(1), row(2), row(1))
     val childOrdering = Seq(SortOrder(exprA, Ascending), SortOrder(exprC, Ascending))
@@ -159,19 +159,19 @@ class GroupPartitionsExecSuite extends SharedSparkSession {
     withSQLConf(
         SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true",
         SQLConf.V2_BUCKETING_PRESERVE_KEY_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
-      // Even though preserve-ordering is enabled, the child is not safe for k-way merge,
+      // Even though enableSortedMerge = true, the child is not safe for k-way merge,
       // so only key-expression orders survive (non-key exprC is dropped).
-      val ordering = GroupPartitionsExec(child).outputOrdering
+      val ordering = GroupPartitionsExec(child, enableSortedMerge = true).outputOrdering
       assert(ordering.length === 1)
       assert(ordering.head.child === exprA)
     }
   }
 
-  test("SPARK-55715: coalescing with sorted merge config enabled returns full child ordering") {
-    // Key 1 appears on partitions 0 and 2, causing coalescing.  The child is a LeafExecNode
-    // so childIsSafeForKWayMerge = true.  With the preserve-ordering config enabled, case 2
-    // of outputOrdering kicks in and the full child ordering (including the non-key exprC) must
-    // be returned, not just the subset of key-expression orders.
+  test("SPARK-55715: coalescing with enableSortedMerge = true returns full child ordering") {
+    // Key 1 appears on partitions 0 and 2, causing coalescing. The child is a LeafExecNode so
+    // childIsSafeForKWayMerge = true. With enableSortedMerge = true and the config enabled,
+    // canUseSortedMerge = true and the full child ordering (including the non-key exprC) must be
+    // returned, not just the subset of key-expression orders.
     val partitionKeys = Seq(row(1), row(2), row(1))
     val childOrdering = Seq(SortOrder(exprA, Ascending), SortOrder(exprC, Ascending))
     val child = DummyLeafSparkPlan(
@@ -181,19 +181,77 @@ class GroupPartitionsExecSuite extends SharedSparkSession {
     assert(!GroupPartitionsExec(child).groupedPartitions.forall(_._2.size <= 1),
       "expected coalescing")
     withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
-      // Config enabled: k-way merge preserves full ordering including non-key exprC.
-      assert(GroupPartitionsExec(child).outputOrdering === childOrdering)
+      assert(GroupPartitionsExec(child).outputOrdering !== childOrdering,
+        "config alone should not enable k-way merge; enableSortedMerge must be set by planner")
+      assert(GroupPartitionsExec(child, enableSortedMerge = true).outputOrdering === childOrdering)
     }
     withSQLConf(
         SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "false",
         SQLConf.V2_BUCKETING_PRESERVE_KEY_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
       // Sorted-merge config disabled, key-ordering config enabled: only key-expression orders
       // survive simple concatenation (non-key exprC is dropped).
-      val ordering = GroupPartitionsExec(child).outputOrdering
+      val ordering = GroupPartitionsExec(child, enableSortedMerge = true).outputOrdering
       assert(ordering.length === 1)
       assert(ordering.head.child === exprA)
     }
   }
+
+  test("SPARK-56549: tryEnableSortedMerge returns Some when conditions are met") {
+    val partitionKeys = Seq(row(1), row(2), row(1))
+    val childOrdering = Seq(SortOrder(exprA, Ascending), SortOrder(exprC, Ascending))
+    val child = DummyLeafSparkPlan(
+      outputPartitioning = KeyedPartitioning(Seq(exprA), partitionKeys),
+      outputOrdering = childOrdering)
+    val gpe = GroupPartitionsExec(child)
+
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
+      val result = gpe.tryEnableSortedMerge()
+      assert(result.isDefined)
+      assert(result.get.enableSortedMerge)
+      assert(result.get.outputOrdering === childOrdering)
+    }
+  }
+
+  test("SPARK-56549: tryEnableSortedMerge returns None when config is disabled") {
+    val partitionKeys = Seq(row(1), row(2), row(1))
+    val childOrdering = Seq(SortOrder(exprA, Ascending))
+    val child = DummyLeafSparkPlan(
+      outputPartitioning = KeyedPartitioning(Seq(exprA), partitionKeys),
+      outputOrdering = childOrdering)
+    val gpe = GroupPartitionsExec(child)
+
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "false") {
+      assert(gpe.tryEnableSortedMerge().isEmpty)
+    }
+  }
+
+  test("SPARK-56549: tryEnableSortedMerge returns None when child is not SafeForKWayMerge") {
+    val partitionKeys = Seq(row(1), row(2), row(1))
+    val childOrdering = Seq(SortOrder(exprA, Ascending))
+    // DummySparkPlan does not extend SafeForKWayMerge
+    val child = DummySparkPlan(
+      outputPartitioning = KeyedPartitioning(Seq(exprA), partitionKeys),
+      outputOrdering = childOrdering)
+    val gpe = GroupPartitionsExec(child)
+
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
+      assert(gpe.tryEnableSortedMerge().isEmpty)
+    }
+  }
+
+  test("SPARK-56549: tryEnableSortedMerge returns None when no coalescing occurs") {
+    val partitionKeys = Seq(row(1), row(2), row(3))
+    val childOrdering = Seq(SortOrder(exprA, Ascending))
+    val child = DummyLeafSparkPlan(
+      outputPartitioning = KeyedPartitioning(Seq(exprA), partitionKeys),
+      outputOrdering = childOrdering)
+    val gpe = GroupPartitionsExec(child)
+
+    assert(gpe.groupedPartitions.forall(_._2.size <= 1), "expected non-coalescing")
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
+      assert(gpe.tryEnableSortedMerge().isEmpty)
+    }
+  }
 }
 
 private case class DummyLeafSparkPlan(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
index 9c67a334c801c..2ff4652646cca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.exchange
 
 import org.apache.spark.api.python.PythonEvalType
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.DirectShufflePartitionID
@@ -27,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.Inner
 import org.apache.spark.sql.catalyst.plans.physical.{SinglePartition, _}
 import org.apache.spark.sql.catalyst.statsEstimation.StatsTestPlan
 import org.apache.spark.sql.connector.catalog.functions._
-import org.apache.spark.sql.execution.{DummySparkPlan, SortExec}
+import org.apache.spark.sql.execution.{BinaryExecNode, DummySparkPlan, LeafExecNode, SafeForKWayMerge, SortExec, UnaryExecNode}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, GroupPartitionsExec}
 import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec}
@@ -1037,10 +1038,10 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         case SortMergeJoinExec(_, _, _, _,
             SortExec(_, _,
               GroupPartitionsExec(DummySparkPlan(_, _, left: KeyedPartitioning, _, _),
-                _, _, _, _), _),
+                _, _, _, _, _), _),
             SortExec(_, _,
               GroupPartitionsExec(DummySparkPlan(_, _, right: KeyedPartitioning, _, _),
-                _, _, _, _), _),
+                _, _, _, _, _), _),
             _) =>
           assert(left.expressions === Seq(bucket(4, exprB), bucket(8, exprC)))
           assert(right.expressions === Seq(bucket(4, exprC), bucket(8, exprB)))
@@ -1061,10 +1062,10 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         case SortMergeJoinExec(_, _, _, _,
             SortExec(_, _,
               GroupPartitionsExec(DummySparkPlan(_, _, left: PartitioningCollection, _, _),
-                _, _, _, _), _),
+                _, _, _, _, _), _),
             SortExec(_, _,
               GroupPartitionsExec(DummySparkPlan(_, _, right: KeyedPartitioning, _, _),
-                _, _, _, _), _),
+                _, _, _, _, _), _),
             _) =>
           assert(left.partitionings.length == 2)
           assert(left.partitionings.head.isInstanceOf[KeyedPartitioning])
@@ -1096,10 +1097,10 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         case SortMergeJoinExec(_, _, _, _,
             SortExec(_, _,
               GroupPartitionsExec(DummySparkPlan(_, _, left: PartitioningCollection, _, _),
-                _, _, _, _), _),
+                _, _, _, _, _), _),
             SortExec(_, _,
               GroupPartitionsExec(DummySparkPlan(_, _, right: PartitioningCollection, _, _),
-                _, _, _, _), _),
+                _, _, _, _, _), _),
             _) =>
           assert(left.partitionings.length == 2)
           assert(left.partitionings.head.isInstanceOf[KeyedPartitioning])
@@ -1397,4 +1398,159 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       requiredChildDistribution = Seq(UnspecifiedDistribution),
       requiredChildOrdering = Seq(Seq.empty)
     )
+
+  test("SPARK-56549: tryEnableSortedMerge traversal continues through plain unary nodes") {
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
+      val exprKey = AttributeReference("k", IntegerType)()
+      val partitionKeys = Seq(InternalRow(1), InternalRow(2), InternalRow(1))
+      val ordering = Seq(SortOrder(exprKey, Ascending))
+      val leaf = DummyLeafSafeForKWayMerge(
+        outputPartitioning = KeyedPartitioning(Seq(exprKey), partitionKeys),
+        outputOrdering = ordering)
+      val gpe = GroupPartitionsExec(leaf)
+
+      // Baseline: GPE at root -- at least one alternative has sorted merge enabled.
+      assert(EnsureRequirements.tryEnableSortedMerge(gpe).exists(anyGpeEnabled))
+      // Plain unary wrapper (e.g. FilterExec): traversal continues and sorted merge is enabled.
+      assert(EnsureRequirements.tryEnableSortedMerge(DummyPassthroughUnaryExec(gpe))
+        .exists(anyGpeEnabled))
+      // Two levels of plain unary wrappers: still enabled.
+      assert(EnsureRequirements.tryEnableSortedMerge(
+        DummyPassthroughUnaryExec(DummyPassthroughUnaryExec(gpe)))
+        .exists(anyGpeEnabled))
+    }
+  }
+
+  test("SPARK-56549: tryEnableSortedMerge traversal continues through binary nodes that " +
+    "propagate ordering from one child (e.g. ShuffledHashJoinExec stream side)") {
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
+      val exprKey = AttributeReference("k", IntegerType)()
+      val partitionKeys = Seq(InternalRow(1), InternalRow(2), InternalRow(1))
+      val ordering = Seq(SortOrder(exprKey, Ascending))
+      val leaf = DummyLeafSafeForKWayMerge(
+        outputPartitioning = KeyedPartitioning(Seq(exprKey), partitionKeys),
+        outputOrdering = ordering)
+      val gpe = GroupPartitionsExec(leaf)
+      val otherChild = DummyLeafSafeForKWayMerge()
+
+      // Binary node whose ordering comes from left child (GPE side): sorted merge enabled.
+      assert(EnsureRequirements.tryEnableSortedMerge(DummyOrderFromLeftBinaryExec(gpe, otherChild))
+        .exists(anyGpeEnabled))
+      // Binary node with GPE only on the non-ordering (right) side: the binary node's
+      // outputPartitioning = left.outputPartitioning carries no KeyedPartitioning, so the pruning
+      // condition stops traversal at the root; no GPE is enabled.
+      assert(!EnsureRequirements.tryEnableSortedMerge(DummyOrderFromLeftBinaryExec(otherChild, gpe))
+        .exists(anyGpeEnabled))
+    }
+  }
+
+  test("SPARK-56549: tryEnableSortedMerge traversal through binary nodes with " +
+    "PartitioningCollection (KP from both children, e.g. SHJ InnerLike)") {
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
+      val exprKey = AttributeReference("k", IntegerType)()
+      val partitionKeys = Seq(InternalRow(1), InternalRow(2), InternalRow(1))
+      val ordering = Seq(SortOrder(exprKey, Ascending))
+      val leaf = DummyLeafSafeForKWayMerge(
+        outputPartitioning = KeyedPartitioning(Seq(exprKey), partitionKeys),
+        outputOrdering = ordering)
+      val gpe = GroupPartitionsExec(leaf)
+      val otherChild = DummyLeafSafeForKWayMerge(
+        outputPartitioning = UnknownPartitioning(gpe.outputPartitioning.numPartitions))
+
+      // GPE on ordering (left) side: sorted merge is enabled and the binary's outputOrdering
+      // becomes non-empty.
+      assert(EnsureRequirements.tryEnableSortedMerge(DummyBothKPBinaryExec(gpe, otherChild))
+        .exists(p => anyGpeEnabled(p) && p.outputOrdering.nonEmpty))
+
+      // GPE on non-ordering (right) side: the PartitioningCollection on the binary node includes
+      // KP from the right child, so traversal enters the binary and sorted merge IS enabled on the
+      // GPE. However, the binary's outputOrdering remains empty: it comes from the left (non-GPE)
+      // child. The call site's find correctly rejects all such alternatives.
+      assert(EnsureRequirements.tryEnableSortedMerge(DummyBothKPBinaryExec(otherChild, gpe))
+        .exists(anyGpeEnabled))
+      assert(!EnsureRequirements.tryEnableSortedMerge(DummyBothKPBinaryExec(otherChild, gpe))
+        .exists(_.outputOrdering.nonEmpty))
+    }
+  }
+
+  test("SPARK-56549: tryEnableSortedMerge traversal stops at SortExec and Exchange") {
+    withSQLConf(SQLConf.V2_BUCKETING_PRESERVE_ORDERING_ON_COALESCE_ENABLED.key -> "true") {
+      val exprKey = AttributeReference("k", IntegerType)()
+      val partitionKeys = Seq(InternalRow(1), InternalRow(2), InternalRow(1))
+      val ordering = Seq(SortOrder(exprKey, Ascending))
+      val leaf = DummyLeafSafeForKWayMerge(
+        outputPartitioning = KeyedPartitioning(Seq(exprKey), partitionKeys),
+        outputOrdering = ordering)
+      val gpe = GroupPartitionsExec(leaf)
+
+      // SortExec: the pruning condition (!isInstanceOf[SortExec]) stops traversal, so the GPE
+      // inside is not enabled in any alternative.
+      assert(!EnsureRequirements.tryEnableSortedMerge(
+        SortExec(ordering, global = false, child = gpe)).exists(anyGpeEnabled))
+      // Exchange produces non-KeyedPartitioning output so the hasKeyedPartitioning half of the
+      // pruning condition stops traversal; GPE inside is not enabled.
+      assert(!EnsureRequirements.tryEnableSortedMerge(DummyExchangeExec(gpe)).exists(anyGpeEnabled))
+      // Plain unary wrapper above a SortExec: traversal reaches the wrapper but stops at the
+      // SortExec; GPE inside is still not enabled.
+      assert(!EnsureRequirements.tryEnableSortedMerge(
+        DummyPassthroughUnaryExec(SortExec(ordering, global = false, child = gpe)))
+        .exists(anyGpeEnabled))
+    }
+  }
+
+  private def anyGpeEnabled(plan: SparkPlan): Boolean =
+    plan.collectFirst { case gpe: GroupPartitionsExec if gpe.enableSortedMerge => true }.isDefined
+}
+
+private case class DummyLeafSafeForKWayMerge(
+    override val outputOrdering: Seq[SortOrder] = Nil,
+    override val outputPartitioning: Partitioning = UnknownPartitioning(0)
+  ) extends LeafExecNode with SafeForKWayMerge {
+  override protected def doExecute(): RDD[InternalRow] = null
+  override def output: Seq[Attribute] = Seq.empty
+}
+
+private case class DummyPassthroughUnaryExec(child: SparkPlan) extends UnaryExecNode {
+  override def output: Seq[Attribute] = child.output
+  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+  override protected def doExecute(): RDD[InternalRow] = null
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    copy(child = newChild)
+}
+
+// Models a binary join whose output ordering comes from the left child (e.g. SHJ stream=left).
+private case class DummyOrderFromLeftBinaryExec(left: SparkPlan, right: SparkPlan)
+    extends BinaryExecNode {
+  override def output: Seq[Attribute] = left.output ++ right.output
+  override def outputOrdering: Seq[SortOrder] = left.outputOrdering
+  override def outputPartitioning: Partitioning = left.outputPartitioning
+  override protected def doExecute(): RDD[InternalRow] = null
+  override protected def withNewChildrenInternal(
+      newLeft: SparkPlan, newRight: SparkPlan): SparkPlan =
+    copy(left = newLeft, right = newRight)
+}
+
+// Models a binary join whose outputPartitioning is a PartitioningCollection containing both
+// children's partitionings (e.g. SHJ InnerLike), while outputOrdering still comes from the left
+// child only.
+private case class DummyBothKPBinaryExec(left: SparkPlan, right: SparkPlan)
+    extends BinaryExecNode {
+  override def output: Seq[Attribute] = left.output ++ right.output
+  override def outputOrdering: Seq[SortOrder] = left.outputOrdering
+  override def outputPartitioning: Partitioning =
+    PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning))
+  override protected def doExecute(): RDD[InternalRow] = null
+  override protected def withNewChildrenInternal(
+      newLeft: SparkPlan, newRight: SparkPlan): SparkPlan =
+    copy(left = newLeft, right = newRight)
+}
+
+// Exchange produces non-KeyedPartitioning output (UnknownPartitioning by default);
+// do not override outputPartitioning or outputOrdering here.
+private case class DummyExchangeExec(child: SparkPlan) extends Exchange {
+  override def output: Seq[Attribute] = child.output
+  override protected def doExecute(): RDD[InternalRow] = null
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    copy(child = newChild)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/MetricsFailureInjectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/MetricsFailureInjectionSuite.scala
new file mode 100644
index 0000000000000..847a12f4f305c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/MetricsFailureInjectionSuite.scala
@@ -0,0 +1,364 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.metric
+
+import scala.util.Random
+
+import org.apache.spark.internal.config
+import org.apache.spark.sql.{Column, Dataset}
+import org.apache.spark.sql.execution.adaptive.{AQETestHelper, DisableAdaptiveExecutionSuite}
+import org.apache.spark.sql.functions.udf
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class MetricsFailureInjectionSuite
+  extends SharedSparkSession
+  with SQLMetricsTestUtils
+  // Need to control AQE per-test to ensure expected plan shapes.
+  with DisableAdaptiveExecutionSuite {
+
+  import testImplicits._
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    // Disable re-use, since it interferes with the forced replanning.
+    spark.conf.set(SQLConf.EXCHANGE_REUSE_ENABLED, false)
+  }
+
+  def setUpTestTable(tableName: String): Unit = {
+    val rand = new Random(1)
+    val randomPrefix = rand.nextString(30)
+    spark.range(300).map { id =>
+        (id, (id % 5).toInt, randomPrefix + (id % 111))
+      }.toDF("id", "low_cardinality_col", "large_col")
+      .write.format("parquet").saveAsTable(tableName)
+    val numRecords = spark.read.table(tableName).count()
+    assert(numRecords === 300)
+  }
+
+  for {
+    useAQE <- BOOLEAN_DOMAIN
+  } test(s"Two stage metrics AQE cancellation injection - useAQE=$useAQE") {
+    withSQLConf(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> useAQE.toString) {
+      val stage1Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 1 counter")
+      val stage2Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 2 counter")
+      val stage1SLAMetric =
+        SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 1 SLAM")
+      val stage2SLAMetric =
+        SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 2 SLAM")
+
+      def runQueryWithMetrics(
+          triggerMetrics: SQLMetric*)(
+          postRunChecks: Dataset[_] => Unit): Unit = {
+        assert(stage1Metric.value === 0)
+        assert(stage2Metric.value === 0)
+        withTable("test_table") {
+          setUpTestTable("test_table")
+          AQETestHelper.withForcedCancellation(triggerMetrics: _*) {
+            val stage1MetricsExpr = incrementMetrics(Seq(stage1Metric, stage1SLAMetric))
+            val stage1 = spark.read.table("test_table").filter(Column(stage1MetricsExpr))
+            val stage2MetricsExpr = incrementMetrics(Seq(stage2Metric, stage2SLAMetric))
+            val stage2 =
+              stage1.groupBy("low_cardinality_col").count().filter(Column(stage2MetricsExpr))
+            val finalDf = stage2.as[(Int, Long)]
+            val result = finalDf.collect()
+
+            assert(result.toMap === (0 until 5).map(v => (v, 300 / 5)).toMap)
+            postRunChecks(finalDf)
+            stage1Metric.reset()
+            stage2Metric.reset()
+          }
+        }
+      }
+
+      // SLAM values don't change with retries, so we can reuse the same assertions for all cases.
+      def assertSLAM(finalDf: Dataset[_]): Unit = {
+        assert(stage1SLAMetric.lastAttemptValueForHighestRDDId() === Some(300))
+        assert(stage2SLAMetric.lastAttemptValueForHighestRDDId() === Some(5))
+
+        assert(stage1SLAMetric.lastAttemptValueForDataset(finalDf) === Some(300))
+        assert(stage2SLAMetric.lastAttemptValueForDataset(finalDf) === Some(5))
+      }
+
+      // Case 1: No forced replanning.
+      runQueryWithMetrics() { finalDf =>
+        assert(stage1Metric.value === 300)
+        assert(stage2Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+
+      // Case 2: Replan on stage1Metric.
+      runQueryWithMetrics(stage1Metric) { finalDf =>
+        if (useAQE) {
+          assert(stage1Metric.value > 300)
+        } else {
+          assert(stage1Metric.value === 300)
+        }
+        assert(stage2Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+
+      // Case 3: Replan on stage2Metric (will be ignored, because this is a result stage).
+      runQueryWithMetrics(stage2Metric) { finalDf =>
+        assert(stage1Metric.value === 300)
+        assert(stage2Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+
+      // Case 4: Replan on both metrics (only first will actually trigger).
+      runQueryWithMetrics(stage1Metric, stage2Metric) { finalDf =>
+        if (useAQE) {
+          assert(stage1Metric.value > 300)
+        } else {
+          assert(stage1Metric.value === 300)
+        }
+        assert(stage2Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+    }
+  }
+
+  for {
+    useAQE <- BOOLEAN_DOMAIN
+  } test(s"Three stage metrics AQE cancellation injection - useAQE=$useAQE") {
+    withSQLConf(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> useAQE.toString) {
+      val stage1Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 1 counter")
+      val stage2Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 2 counter")
+      val stage3Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 3 counter")
+      val stage1SLAMetric =
+        SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 1 SLAM")
+      val stage2SLAMetric =
+        SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 2 SLAM")
+      val stage3SLAMetric =
+        SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 3 SLAM")
+
+      def runQueryWithMetrics(
+          triggerMetrics: SQLMetric*)(postRunChecks: Dataset[_] => Unit): Unit = {
+        assert(stage1Metric.value === 0)
+        assert(stage2Metric.value === 0)
+        withTable("primary_table", "secondary_table") {
+          // Use the same layout for both. Makes the query a non-obvious self-join essentially.
+          setUpTestTable("primary_table")
+          setUpTestTable("secondary_table")
+          AQETestHelper.withForcedCancellation(triggerMetrics: _*) {
+            val stage1MetricsExpr = incrementMetrics(Seq(stage1Metric, stage1SLAMetric))
+            val stage1 = spark.read.table("primary_table")
+              .filter(Column(stage1MetricsExpr))
+            val stage2MetricsExpr = incrementMetrics(Seq(stage2Metric, stage2SLAMetric))
+            val stage2 = stage1.join(
+                spark.read.table("secondary_table"),
+                usingColumn = "id",
+                joinType = "fullOuter")
+              .filter(Column(stage2MetricsExpr))
+            val stage3MetricsExpr = incrementMetrics(Seq(stage3Metric, stage3SLAMetric))
+            val stage3 = stage2
+              .groupBy("primary_table.low_cardinality_col")
+              .count()
+              .filter(Column(stage3MetricsExpr))
+            val finalDf = stage3.as[(Int, Long)]
+            val result = finalDf.collect()
+            assert(result.toMap === (0 until 5).map(v => (v, 300 / 5)).toMap)
+            postRunChecks(finalDf)
+            stage1Metric.reset()
+            stage2Metric.reset()
+            stage3Metric.reset()
+          }
+        }
+      }
+
+      // SLAM values don't change with retries, so we can reuse the same assertions for all cases.
+      def assertSLAM(finalDf: Dataset[_]): Unit = {
+        assert(stage1SLAMetric.lastAttemptValueForHighestRDDId() === Some(300))
+        assert(stage2SLAMetric.lastAttemptValueForHighestRDDId() === Some(300))
+        assert(stage3SLAMetric.lastAttemptValueForHighestRDDId() === Some(5))
+
+        assert(stage1SLAMetric.lastAttemptValueForDataset(finalDf) === Some(300))
+        assert(stage2SLAMetric.lastAttemptValueForDataset(finalDf) === Some(300))
+        assert(stage3SLAMetric.lastAttemptValueForDataset(finalDf) === Some(5))
+      }
+
+      // Case 1: No forced replanning.
+      runQueryWithMetrics() { finalDf =>
+        assert(stage1Metric.value === 300)
+        assert(stage2Metric.value === 300)
+        assert(stage3Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+
+      // Case 2: Replan on stage1Metric.
+      runQueryWithMetrics(stage1Metric) { finalDf =>
+        if (useAQE) {
+          assert(stage1Metric.value > 300)
+        } else {
+          assert(stage1Metric.value === 300)
+        }
+        assert(stage2Metric.value === 300)
+        assert(stage3Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+
+      // Case 3: Replan on stage2Metric (will also re-run the first stage).
+      runQueryWithMetrics(stage2Metric) { finalDf =>
+        if (useAQE) {
+          assert(stage1Metric.value > 300)
+          assert(stage2Metric.value > 300)
+        } else {
+          assert(stage1Metric.value === 300)
+          assert(stage2Metric.value === 300)
+        }
+        assert(stage3Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+
+      // Case 4: Replan on all metrics (only first will actually trigger).
+      runQueryWithMetrics(stage1Metric, stage2Metric, stage3Metric) { finalDf =>
+        if (useAQE) {
+          assert(stage1Metric.value > 300)
+        } else {
+          assert(stage1Metric.value === 300)
+        }
+        assert(stage2Metric.value === 300)
+        assert(stage3Metric.value === 5)
+
+        assertSLAM(finalDf)
+      }
+    }
+  }
+
+  for {
+    injectFailure <- BOOLEAN_DOMAIN
+  } test(s"Two stage metrics block failure injection - injectFailure=$injectFailure") {
+    val stage1Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 1 counter")
+    val stage2Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 2 counter")
+    val stage1SLAMetric =
+      SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 1 SLAM")
+    val stage2SLAMetric =
+      SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 2 SLAM")
+
+    def runQueryWithMetrics(
+        triggerMetrics: SQLMetric*)(postRunChecks: Dataset[_] => Unit): Unit = {
+      assert(stage1Metric.value === 0)
+      assert(stage2Metric.value === 0)
+      withTable("test_table") {
+        setUpTestTable("test_table")
+        withSparkContextConf(
+            config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key -> injectFailure.toString) {
+          val stage1MetricsExpr = incrementMetrics(Seq(stage1Metric, stage1SLAMetric))
+          val stage1 = spark.read.table("test_table").filter(Column(stage1MetricsExpr))
+          val stage2MetricsExpr = incrementMetrics(Seq(stage2Metric, stage2SLAMetric))
+          val stage2 =
+            stage1.groupBy("low_cardinality_col").count().filter(Column(stage2MetricsExpr))
+          val finalDf = stage2.as[(Int, Long)]
+          val result = finalDf.collect()
+          assert(result.toMap === (0 until 5).map(v => (v, 300 / 5)).toMap)
+          postRunChecks(finalDf)
+          stage1Metric.reset()
+          stage2Metric.reset()
+        }
+      }
+    }
+
+    runQueryWithMetrics() { finalDf =>
+      if (injectFailure) {
+        assert(stage1Metric.value > 300)
+      } else {
+        assert(stage1Metric.value === 300)
+      }
+      // Stage2 doesn't have a downstream shuffle stage we can fail.
+      assert(stage2Metric.value === 5)
+
+      assert(stage1SLAMetric.lastAttemptValueForHighestRDDId() === Some(300))
+      assert(stage2SLAMetric.lastAttemptValueForHighestRDDId() === Some(5))
+
+      assert(stage1SLAMetric.lastAttemptValueForDataset(finalDf) === Some(300))
+      assert(stage2SLAMetric.lastAttemptValueForDataset(finalDf) === Some(5))
+    }
+  }
+
+  for {
+    injectFailure <- BOOLEAN_DOMAIN
+  } test(s"Non-deterministic stage block failure injection - injectFailure=$injectFailure") {
+    val stage1Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 1 counter")
+    val stage2Metric = SQLMetrics.createMetric(spark.sparkContext, "stage 2 counter")
+    val stage1SLAMetric =
+      SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 1 SLAM")
+    val stage2SLAMetric =
+      SQLLastAttemptMetrics.createMetric(spark.sparkContext, "stage 2 SLAM")
+
+    def runQueryWithMetrics(
+        triggerMetrics: SQLMetric*)(postRunChecks: Dataset[_] => Unit): Unit = {
+      assert(stage1Metric.value === 0)
+      assert(stage2Metric.value === 0)
+      withTable("test_table") {
+        setUpTestTable("test_table")
+        withSparkContextConf(
+            config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key -> injectFailure.toString) {
+          val stage1MetricsExpr = incrementMetrics(Seq(stage1Metric, stage1SLAMetric))
+          val udfRand =
+            udf {
+              () => {
+                new Random().nextDouble()
+              }
+            }.asNondeterministic().apply().expr
+          val stage1 = spark.read.table("test_table")
+            .withColumn("non_deterministic_col", Column(udfRand))
+            .filter(Column(stage1MetricsExpr))
+          val stage2MetricsExpr = incrementMetrics(Seq(stage2Metric, stage2SLAMetric))
+          val stage2 = stage1
+            .groupBy("low_cardinality_col")
+            .avg("non_deterministic_col")
+            .filter(Column(stage2MetricsExpr))
+          // Add an extra stage with a single task to avoid flaky failures. If a ResultTask
+          // returns non-deterministic results to the client, it forces the query to abort
+          // instead of retrying the input stages.
+          val finalDf = stage2.repartition(1).as[(Int, Double)]
+          val result = finalDf.collect()
+          // Don't compare the second value, since it's random.
+          assert(result.map(_._1).toSet === (0 until 5).toSet)
+          postRunChecks(finalDf)
+          stage1Metric.reset()
+          stage2Metric.reset()
+        }
+      }
+    }
+
+    runQueryWithMetrics() { finalDf =>
+      if (injectFailure) {
+        assert(stage1Metric.value > 300)
+      } else {
+        assert(stage1Metric.value === 300)
+      }
+      // Stage2 doesn't have a downstream shuffle stage we can fail.
+      assert(stage2Metric.value === 5)
+
+      assert(stage1SLAMetric.lastAttemptValueForHighestRDDId() === Some(300))
+      assert(stage2SLAMetric.lastAttemptValueForHighestRDDId() === Some(5))
+
+      assert(stage1SLAMetric.lastAttemptValueForDataset(finalDf) === Some(300))
+      assert(stage2SLAMetric.lastAttemptValueForDataset(finalDf) === Some(5))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricIntegrationSuite.scala
new file mode 100644
index 0000000000000..2e7af075a3e74
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricIntegrationSuite.scala
@@ -0,0 +1,705 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.metric
+
+import org.apache.spark.internal.config
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.optimizer.BuildRight
+import org.apache.spark.sql.catalyst.plans.RightOuter
+import org.apache.spark.sql.execution.{CoalescedPartitionSpec, CoalescedShuffleRead, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/** Tests [[SQLLastAttemptMetric]] used by [[RDD]]s and [[Dataset]]s */
+class SQLLastAttemptMetricIntegrationSuite
+  extends SharedSparkSession
+  with SQLMetricsTestUtils {
+  import testImplicits._
+
+  protected def withRetries = false
+
+  test("single stage rdd updates with shared slam") {
+    val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+    val rdd1 = spark.sparkContext.parallelize(1 to 10, 2).map { x =>
+      slam.add(1)
+      x
+    }
+
+    rdd1.count()
+    assert(withRetries || slam.value === 10)
+    assert(slam.lastAttemptValueForAllRDDs() === Some(10))
+    assert(slam.lastAttemptValueForRDDId(rdd1.id) === Some(10))
+    assert(slam.lastAttemptValueForRDDIds(Seq(rdd1.id, rdd1.id)) === Some(10))
+    assert(slam.lastAttemptValueForRDDIds(Seq(rdd1.id + 1, rdd1.id + 2)) === Some(0))
+    assert(slam.lastAttemptValueForRDDIds(Seq(rdd1.id, rdd1.id + 10, rdd1.id)) === Some(10))
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(10))
+
+    val rdd2 = spark.sparkContext.parallelize(1 to 50, 3).map { x =>
+      slam.add(3)
+      x
+    }
+    rdd2.count()
+    assert(withRetries || slam.value === 160) // +150
+    assert(slam.lastAttemptValueForRDDId(rdd1.id) === Some(10)) // value for first rdd unaffected
+    assert(slam.lastAttemptValueForRDDId(rdd2.id) === Some(150)) // value for second rdd recorded
+    assert(slam.lastAttemptValueForAllRDDs() === Some(160)) // value for all rdds summed
+    assert(slam.getNumRDDs === 2)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(150)) // highest RDD id updated.
+
+    // Re-executing rdd1
+    rdd1.count()
+    assert(withRetries || slam.value === 170) // +10
+    // Re-execution doesn't produce duplicate last attempt values
+    assert(slam.lastAttemptValueForRDDId(rdd1.id) === Some(10))
+    assert(slam.lastAttemptValueForAllRDDs() === Some(160))
+    assert(slam.getNumRDDs === 2)
+    // Highest RDD id tracks highest rdd.id, not the last RDD to be executed
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(150))
+
+    // New RDD on top of rdd1, but in a single stage.
+    val rdd3 = rdd1.map { x =>
+      slam.add(2)
+      x
+    }
+    rdd3.count()
+    assert(withRetries || slam.value === 200) // +30
+    assert(slam.lastAttemptValueForRDDId(rdd1.id) === Some(10)) // stays the same
+    // The increment from rdd1 and rdd3 are in the same stage, so they are recorded together.
+    assert(slam.getNumRDDs === 3)
+    assert(slam.lastAttemptValueForRDDId(rdd3.id) === Some(30))
+    assert(slam.getHighestRDDId === Some(rdd3.id))
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(30))
+
+    // Setting a value directly from the driver makes slam bail out, because it can't reason
+    // about what is the "last attempt" on driver vs. coming from RDD executions.
+    slam.set(42)
+    assert(!slam.getValid)
+    // Information stays available for logging and debugging.
+    assert(slam.getNumRDDs === 3)
+    assert(slam.getHighestRDDId === Some(rdd3.id))
+
+    logInfo(slam.logAccumulatorState)
+  }
+
+  test("multi stage rdd updates") {
+    val slam1 = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "SLAM1")
+    val slam2 = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "SLAM2")
+
+    val rdd1 = spark.sparkContext.parallelize(1 to 10, 2).map { x =>
+      slam1.add(1)
+      x
+    }
+    val repartition = rdd1.repartition(10)
+    val rdd2 = repartition.map { x =>
+      slam2.add(1)
+      x
+    }
+    rdd2.collect()
+    assert(withRetries || slam1.value === 10)
+    assert(withRetries || slam2.value === 10)
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(10))
+    assert(slam2.lastAttemptValueForAllRDDs() === Some(10))
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(10))
+    assert(slam2.lastAttemptValueForHighestRDDId() === Some(10))
+    // It is executed in a Stage submitted by the repartition.
+    assert(slam1.lastAttemptValueForRDDId(rdd1.id) === Some(0))
+    assert(slam1.lastAttemptValueForRDDId(repartition.id) === Some(0)) // Surprise, nope.
+    // Repartition creates a number of MapPartitionsRDDs, CoalescedRDDs, ShuffledRDDs...
+    // The actual stage that submits the map stage is somewhere internal.
+    assert(slam1.getHighestRDDId.isDefined)
+    val mapStageRddId = slam1.getHighestRDDId.get
+    assert(slam1.lastAttemptValueForRDDId(mapStageRddId) === Some(10))
+
+    // Test passing multiple ids.
+    assert(slam1.lastAttemptValueForRDDIds(Seq(rdd1.id, repartition.id)) === Some(0))
+    assert(slam1.lastAttemptValueForRDDIds(
+      Seq(rdd1.id, mapStageRddId, repartition.id)) === Some(10))
+    assert(slam1.lastAttemptValueForRDDIds(Seq(rdd1.id, rdd2.id)) === Some(0))
+    assert(slam1.lastAttemptValueForRDDIds(Seq(-10)) === Some(0))
+
+    rdd2.collect()
+    // Repartition stage is reused, but result stage is re-executed.
+    assert(withRetries || slam1.value === 10) // no change
+    assert(withRetries || slam2.value === 20) // +10
+    // Last attempt value is not duplicated, since result stage is an action on the same RDD.
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(10))
+    assert(slam2.lastAttemptValueForAllRDDs() === Some(10))
+
+    rdd1.collect()
+    assert(withRetries || slam1.value === 20) // +10
+    // The first time around it was executed in the repartition RDD stage.
+    // This time around it is executed from action of rdd1.
+    assert(slam1.getNumRDDs === 2)
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(20))
+    assert(slam1.lastAttemptValueForRDDId(rdd1.id) === Some(10)) // new
+    assert(slam1.lastAttemptValueForRDDId(mapStageRddId) === Some(10)) // old
+    // Highest RDD id stays the same.
+    assert(slam1.getHighestRDDId === Some(mapStageRddId))
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(10))
+
+    rdd1.collect()
+    assert(withRetries || slam1.value === 30) // +10
+    // Still the same
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(20))
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(10))
+
+    rdd2.collect()
+    // Repartition stage is reused (again), but result stage is re-executed (again).
+    assert(withRetries || slam1.value === 30) // no change
+    assert(withRetries || slam2.value === 30) // +10
+    // Last attempt value is not duplicated, since result stage is an action on the same RDD.
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(20))
+    assert(slam2.lastAttemptValueForAllRDDs() === Some(10))
+
+    // Executed in different RDDs, but never duplicated.
+    assert(slam1.lastAttemptValueForRDDId(rdd1.id) === Some(10))
+    assert(slam1.lastAttemptValueForRDDId(mapStageRddId) === Some(10))
+    assert(slam1.lastAttemptValueForRDDId(repartition.id) === Some(0))
+    assert(slam1.lastAttemptValueForRDDId(rdd2.id) === Some(0))
+    assert(slam2.lastAttemptValueForRDDId(rdd1.id) === Some(0))
+    assert(slam2.lastAttemptValueForRDDId(mapStageRddId) === Some(0))
+    assert(slam1.lastAttemptValueForRDDId(repartition.id) === Some(0))
+    assert(slam2.lastAttemptValueForRDDId(rdd2.id) === Some(10))
+
+    val newRepartition = rdd1.repartition(10)
+    val newRdd2 = newRepartition.map { x =>
+      slam2.add(1)
+      x
+    }
+    newRdd2.collect()
+    assert(withRetries || slam1.value === 40) // +10
+    assert(withRetries || slam2.value === 40) // +10
+    // SLAM metrics get re-executed in the new RDDs
+    // rdd1 is reused, but the shuffle is new, and that is what submits the map stage.
+    assert(slam1.getNumRDDs === 3)
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(30)) // +10
+    assert(slam2.getNumRDDs === 2)
+    assert(slam2.lastAttemptValueForAllRDDs() === Some(20)) // +10
+    // Values are recorded for the new highest RDD id.
+    assert(slam1.getHighestRDDId.isDefined)
+    val newMapStageId = slam1.getHighestRDDId.get
+    assert(newMapStageId > mapStageRddId)
+    assert(slam2.getHighestRDDId === Some(newRdd2.id))
+    assert(slam1.lastAttemptValueForRDDId(newMapStageId) === Some(10))
+    assert(slam2.lastAttemptValueForRDDId(newRdd2.id) === Some(10))
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(10))
+    assert(slam2.lastAttemptValueForHighestRDDId() === Some(10))
+
+    logInfo(slam1.logAccumulatorState)
+    logInfo(slam2.logAccumulatorState)
+  }
+
+  test("rdd take") {
+    val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+    val rdd = spark.sparkContext.parallelize(1 to 100, 100).map { x =>
+      slam.add(1)
+      x
+    }
+    withSparkContextConf(
+      // make it fixed to not be affected by potential changes of default.
+      config.RDD_LIMIT_INITIAL_NUM_PARTITIONS.key -> "1",
+      config.RDD_LIMIT_SCALE_UP_FACTOR.key -> "4"
+    ) {
+      rdd.take(1) // execute 1 partition
+      assert(withRetries || slam.value === 1)
+      assert(slam.lastAttemptValueForAllRDDs() === Some(1))
+
+      // take(2) scales up from 1 partition; the exact number of partitions scanned
+      // depends on the scale-up algorithm.
+      val valueBefore = slam.value
+      rdd.take(2)
+      val slamAfterTake2 = slam.lastAttemptValueForAllRDDs()
+      assert(slamAfterTake2.isDefined)
+      assert(slamAfterTake2.get >= 2) // at least 2 partitions
+      assert(slamAfterTake2.get < 100) // but not all partitions.
+
+      // take(100) should execute all 100 partitions
+      rdd.take(100)
+      assert(slam.lastAttemptValueForAllRDDs() === Some(100))
+      assert(slam.getNumRDDs === 1)
+    }
+
+    logInfo(slam.logAccumulatorState)
+  }
+
+  test("rdd coalesce") {
+    val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+    val rdd1 = spark.sparkContext.parallelize(1 to 100, 100).map { x =>
+      slam.add(1)
+      x
+    }
+    val rdd2 = rdd1.coalesce(20)
+    // Test that coalescing that changes partition count doesn't break anything.
+    rdd2.collect()
+    assert(slam.lastAttemptValueForRDDId(rdd2.id) === Some(100))
+    rdd1.collect()
+    assert(slam.lastAttemptValueForRDDId(rdd1.id) === Some(100))
+
+    logInfo(slam.logAccumulatorState)
+  }
+
+  test("dataset updates") {
+    val slam1 = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "SLAM1")
+    val df1 = spark.range(10).filter(Column(incrementMetric(slam1)))
+
+    df1.collect()
+    assert(withRetries || slam1.value === 10)
+    assert(slam1.getHighestRDDId.isDefined)
+    val df1HighestId = slam1.getHighestRDDId.get
+    val df1ExecutedPlanRddId = df1.queryExecution.executedPlan.execute().id
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(10))
+    assert(slam1.lastAttemptValueForRDDId(df1HighestId) === Some(10))
+    // Values retrieved from the Dataset are the same as from the RDD.
+    assert(slam1.lastAttemptValueForDataset(df1) === Some(10))
+    assert(slam1.lastAttemptValueForQueryExecution(df1.queryExecution) === Some(10))
+
+    df1.collect()
+    assert(withRetries || slam1.value === 20) // +10
+    // The same executedPlan RDD is reused, but getByteArrayRdd creates a new wrapper.
+    assert(df1.queryExecution.executedPlan.execute().id === df1ExecutedPlanRddId)
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(10))
+    // Both wrapper RDDs are summed in allRDDs.
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(20))
+    assert(slam1.lastAttemptValueForDataset(df1) === Some(10))
+
+    val df2 = df1.filter("id < 5").filter(Column(incrementMetric(slam1)))
+    df2.collect()
+    assert(withRetries || slam1.value === 35) // +15
+    assert(slam1.getHighestRDDId.isDefined)
+    // Both incrementMetric expressions are within the same Stage, so they record together.
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(15))
+    // allRDDs includes wrapper RDDs from repeated df1.collect() calls.
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(35))
+    // New Dataset records only new value.
+    assert(slam1.lastAttemptValueForDataset(df2) === Some(15))
+    // Value df1 is still remembered.
+    assert(slam1.lastAttemptValueForDataset(df1) === Some(10))
+
+    val slam2 = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "SLAM2")
+    val df3 = df1.repartition(1).filter(Column(incrementMetric(slam2)))
+    df3.collect()
+    assert(withRetries || slam1.value === 45) // +10
+    assert(withRetries || slam2.value === 10) // new
+    // df3 creates a new plan, and the plan / RDD from df1 is not reused.
+    assert(slam1.getHighestRDDId.isDefined)
+    val slam1HighestId = slam1.getHighestRDDId.get
+    assert(slam1HighestId != df1HighestId) // new plan, new RDD id
+    assert(slam1.lastAttemptValueForHighestRDDId() === Some(10)) // from new execution
+    assert(slam1.lastAttemptValueForRDDId(df1HighestId) === Some(10)) // from first exec of df1
+    // allRDDs includes wrapper RDDs from repeated collects.
+    assert(slam1.lastAttemptValueForAllRDDs() === Some(45))
+    assert(slam2.lastAttemptValueForAllRDDs() === Some(10))
+    // slam1 and slam2 are both executed in df3
+    assert(slam1.lastAttemptValueForDataset(df3) === Some(10))
+    assert(slam2.lastAttemptValueForDataset(df3) === Some(10))
+    // slam2 is not executed in df1 and df2.
+    assert(slam2.lastAttemptValueForDataset(df1) === Some(0))
+    assert(slam2.lastAttemptValueForDataset(df2) === Some(0))
+    // slam1 value from df1 and df2 are still remembered.
+    assert(slam1.lastAttemptValueForDataset(df1) === Some(10))
+    assert(slam1.lastAttemptValueForDataset(df2) === Some(15))
+
+    // Plans and RDDs get reused (result stage is re-executed; shuffle stage is purely reused).
+    df3.collect()
+    // No change in dataset values.
+    assert(slam1.lastAttemptValueForDataset(df3) === Some(10))
+    assert(slam2.lastAttemptValueForDataset(df3) === Some(10))
+    assert(slam1.lastAttemptValueForDataset(df1) === Some(10))
+    assert(slam1.lastAttemptValueForDataset(df2) === Some(15))
+
+    logInfo(slam1.logAccumulatorState)
+    logInfo(slam2.logAccumulatorState)
+  }
+
+  test("dataset limit") {
+    val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "SLAM1")
+    // 10 partitions of 10 elements each
+    val df = spark.range(0, 1000, 1, 10).filter(Column(incrementMetric(slam)))
+    var expectedMetricValue = 0
+    var expectedSLAMValue = 0
+
+    // Note: this is sensitive to the internal implementation of LimitExec.
+
+    df.take(5)
+    // One partition executed, local limit pushed into partition.
+    expectedMetricValue = 5
+    expectedSLAMValue = 5
+    assert(withRetries || slam.value === expectedMetricValue)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(expectedSLAMValue))
+    // take(5) actually inline creates a new Dataset, with new executed plan
+    assert(slam.lastAttemptValueForDataset(df) === Some(0))
+
+    df.take(50)
+    // One partition executed, local limit pushed into partition.
+    expectedMetricValue += 50
+    expectedSLAMValue = 50
+    assert(withRetries || slam.value === expectedMetricValue)
+    // New SQL plan creates new RDDs, so this is seen as new execution.
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(expectedSLAMValue))
+    assert(slam.getNumRDDs === 2)
+    assert(slam.lastAttemptValueForAllRDDs() === Some(expectedMetricValue))
+    // take(50) executes a different inline Dataset and plan.
+    assert(slam.lastAttemptValueForDataset(df) === Some(0))
+
+    df.take(220)
+    // Three partitions executed.
+    expectedMetricValue += 300
+    expectedSLAMValue = 300
+    assert(withRetries || slam.value === expectedMetricValue)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(expectedSLAMValue))
+    assert(slam.getNumRDDs === 3)
+    assert(slam.lastAttemptValueForAllRDDs() === Some(expectedMetricValue))
+    // take(220) executes a different inline Dataset and plan.
+    assert(slam.lastAttemptValueForDataset(df) === Some(0))
+
+    df.take(320)
+    // Five partitions executed.
+    expectedMetricValue += 500
+    expectedSLAMValue = 500
+    assert(withRetries || slam.value === expectedMetricValue)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(expectedSLAMValue))
+    assert(slam.getNumRDDs === 4)
+    assert(slam.lastAttemptValueForAllRDDs() === Some(expectedMetricValue))
+    // take(320) executes a different inline Dataset and plan.
+    assert(slam.lastAttemptValueForDataset(df) === Some(0))
+
+    df.take(1)
+    // One partition scanned, local limit pushed into partition.
+    expectedMetricValue += 1
+    expectedSLAMValue = 1
+    assert(withRetries || slam.value === expectedMetricValue)
+    // New RDD, so the value from new execution is back to 1.
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(expectedSLAMValue))
+    assert(slam.getNumRDDs === 5)
+    assert(slam.lastAttemptValueForAllRDDs() === Some(expectedMetricValue))
+    // take(1) executes a different inline Dataset and plan.
+    assert(slam.lastAttemptValueForDataset(df) === Some(0))
+
+    logInfo(slam.logAccumulatorState)
+  }
+
+  test("driver set value") {
+    val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+    slam.set(10)
+
+    // Regular metric value
+    assert(withRetries || slam.value === 10)
+
+    assert(slam.getDirectDriverValue === Some(10))
+    // "Driver update" is returned under "highest" and "all" RDDs
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(10))
+    assert(slam.lastAttemptValueForAllRDDs() === Some(10))
+    assert(slam.getNumRDDs === 0)
+    // When specific RDDs are requested, driver value is not returned.
+    assert(slam.lastAttemptValueForRDDId(42) === Some(0))
+    assert(slam.lastAttemptValueForRDDIds(Seq(7, 42)) === Some(0))
+
+    // Incrementing works
+    slam.add(5)
+    assert(withRetries || slam.value === 15)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(15))
+    assert(slam.lastAttemptValueForAllRDDs() === Some(15))
+    assert(slam.getDirectDriverValue === Some(15))
+
+    // Negative increments are ignored by SQLMetric
+    slam.add(-3)
+    assert(withRetries || slam.value === 15)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(15))
+    assert(slam.lastAttemptValueForAllRDDs() === Some(15))
+    assert(slam.getDirectDriverValue === Some(15))
+
+    // Reset does not reset SLAM.
+    slam.reset()
+    assert(withRetries || slam.value === 0)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(15))
+    assert(slam.lastAttemptValueForAllRDDs() === Some(15))
+    assert(slam.getDirectDriverValue === Some(15))
+
+    // Setting it back...
+    slam.set(20)
+    assert(withRetries || slam.value === 20)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(20))
+    assert(slam.lastAttemptValueForAllRDDs() === Some(20))
+    assert(slam.getDirectDriverValue === Some(20))
+    assert(slam.getNumRDDs === 0)
+
+    val df = spark.range(10).filter(Column(incrementMetric(slam)))
+    // SLAM was not executed in this Dataset, the driver value set manually
+    // before should not be returned.
+    assert(slam.lastAttemptValueForDataset(df) === Some(0))
+    assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === None)
+    df.collect()
+    assert(withRetries || slam.value === 30)
+    // SLAM bails out when it sees both driver and executor values
+    assert(slam.lastAttemptValueForHighestRDDId() === None)
+    assert(slam.lastAttemptValueForAllRDDs() === None)
+    assert(slam.lastAttemptValueForRDDId(42) === None)
+    assert(slam.lastAttemptValueForRDDIds(Seq(7, 42)) === None)
+    assert(slam.lastAttemptValueForDataset(df) === None)
+    assert(!slam.getValid)
+    assert(slam.getNumRDDs === 0) // invalidated before RDD got recorded
+    assert(slam.getDirectDriverValue === Some(20))
+    // Invalidated before QueryExecution value was recorded.
+    assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === None)
+
+    slam.reset()
+    slam.set(10)
+    assert(withRetries || slam.value === 10)
+    // SLAM stays bailed out.
+    assert(slam.lastAttemptValueForHighestRDDId() === None)
+    assert(slam.lastAttemptValueForAllRDDs() === None)
+    assert(slam.lastAttemptValueForRDDId(42) === None)
+    assert(slam.lastAttemptValueForRDDIds(Seq(7, 42)) === None)
+    assert(slam.lastAttemptValueForDataset(df) === None)
+    assert(!slam.getValid)
+    // SLAM info doesn't get updated anymore when invalid, but stays around for debugging purposes.
+    assert(slam.getNumRDDs === 0)
+    assert(slam.getDirectDriverValue === Some(20))
+    assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === None)
+
+    logInfo(slam.logAccumulatorState)
+
+    // resetLastAttemptAccumulator resets it and makes it valid to be used again.
+    slam.resetLastAttemptAccumulator()
+    assert(slam.getValid)
+    slam.set(42)
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(42))
+    assert(slam.getDirectDriverValue === Some(42))
+    assert(slam.getNumRDDs === 0)
+    assert(slam.lastAttemptValueForDataset(df) === Some(0))
+  }
+
+  test("ConvertToLocalRelation direct driver execution") {
+    // Normally ConvertToLocalRelation is disabled in tests.
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
+      val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+      val df = Seq(1, 2, 3).toDF("a").filter(Column(incrementMetric(slam)))
+
+      // SLAM is executed on the driver in the Optimized by ConvertToLocalRelation
+      df.collect()
+      assert(slam.lastAttemptValueForAllRDDs() === Some(3))
+      assert(slam.lastAttemptValueForHighestRDDId() === Some(3))
+      assert(slam.getDirectDriverValue === Some(3))
+      // SLAM recognizes it was executed on the driver
+      // in the scope of the QueryExecution of this Dataset.
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+      assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === Some(3))
+
+      // Second action does not re-execute Optimizer.
+      df.collect()
+      assert(slam.lastAttemptValueForAllRDDs() === Some(3))
+      assert(slam.lastAttemptValueForHighestRDDId() === Some(3))
+      assert(slam.getDirectDriverValue === Some(3))
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+
+      // Limitation: When a new Dataset is built and Optimizer reexecutes ConvertToLocalRelation,
+      // SLAM RDD retrieval cannot reason about re-execution on the driver,
+      // leading to duplicated metrics.
+      val df2 = df.withColumn("foo", Column(Literal("foo")))
+      df2.collect()
+      assert(slam.lastAttemptValueForAllRDDs() === Some(6))
+      assert(slam.lastAttemptValueForHighestRDDId() === Some(6))
+      assert(slam.getDirectDriverValue === Some(6))
+      // But it recognizes that it is done in a new QueryExecution and is able to distinguish that
+      // without duplicates.
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+      assert(slam.lastAttemptValueForDataset(df2) === Some(3))
+      assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === Some(3))
+      assert(slam.getDirectDriverQueryExecutionValue(df2.queryExecution.id.toString) === Some(3))
+
+      // No RDD executions were recorded.
+      assert(slam.getNumRDDs === 0)
+
+      logInfo(slam.logAccumulatorState)
+    }
+  }
+
+  test("ConvertToLocalRelation manual optimizer triggering") {
+    // Normally ConvertToLocalRelation is disabled in tests.
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
+      val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+      val df = Seq(1, 2, 3).toDF("a").filter(Column(incrementMetric(slam)))
+      // Trigger the optimizer manually, which will trigger ConvertToLocalRelation
+      df.queryExecution.assertOptimized()
+
+      // SLAM recognizes it was executed on the driver
+      // in the scope of the QueryExecution of this Dataset.
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+      assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === Some(3))
+
+      // Repeated actions do not re-execute Optimizer.
+      df.collect()
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+      df.collect()
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+
+      logInfo(slam.logAccumulatorState)
+    }
+  }
+
+  test("ConvertToLocalRelation in explain") {
+    // Normally ConvertToLocalRelation is disabled in tests.
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
+      val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+      val df = Seq(1, 2, 3).toDF("a").filter(Column(incrementMetric(slam)))
+
+      // EXPLAIN triggers the optimizer and triggered ConvertToLocalRelation to execute
+      df.explain(true)
+      assert(withRetries || slam.value === 3)
+      assert(slam.lastAttemptValueForAllRDDs() === Some(3))
+      assert(slam.lastAttemptValueForHighestRDDId() === Some(3))
+      assert(slam.getDirectDriverValue === Some(3))
+      assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === Some(3))
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+      // Retriggering EXPLAIN does not cause duplicates
+      df.explain(true)
+      assert(withRetries || slam.value === 3)
+      assert(slam.lastAttemptValueForAllRDDs() === Some(3))
+      assert(slam.lastAttemptValueForHighestRDDId() === Some(3))
+      assert(slam.getDirectDriverValue === Some(3))
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+      assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === Some(3))
+
+      // Execution does not re-execute Optimizer and does not duplicate metric.
+      df.collect()
+      assert(withRetries || slam.value === 3)
+      assert(slam.lastAttemptValueForAllRDDs() === Some(3))
+      assert(slam.lastAttemptValueForHighestRDDId() === Some(3))
+      assert(slam.getDirectDriverValue === Some(3))
+      assert(slam.lastAttemptValueForDataset(df) === Some(3))
+      assert(slam.getDirectDriverQueryExecutionValue(df.queryExecution.id.toString) === Some(3))
+
+      // No RDD executions were recorded.
+      assert(slam.getNumRDDs === 0)
+
+      logInfo(slam.logAccumulatorState)
+    }
+  }
+
+  test("BroadcastNestedLoopJoin outer executes probe side twice") {
+    val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+    val build =
+      spark.range(5).selectExpr("id as b").hint("broadcast")
+    val probe = spark.range(100).selectExpr("id as p").filter(Column(incrementMetric(slam)))
+    val df = probe.join(build, usingColumns = Seq(), joinType = "rightouter")
+    df.collect()
+    assert(AdaptiveSparkPlanHelper.exists(df.queryExecution.executedPlan) {
+      case BroadcastNestedLoopJoinExec(_, _, BuildRight, RightOuter, None) => true
+      case _ => false
+    })
+    // When build side is outer, probe side gets executed twice by BNLJ:
+    // once for matches, and once to mark unmatched build rows.
+    // This is a non-determinism correctness issue, and the two executions
+    // should not be double-counted in the last attempt value.
+    assert(slam.getNumRDDs === 2)
+    assert(slam.lastAttemptValueForAllRDDs() === Some(200))
+    // The two executions are different RDDs, but only one of them is highest id.
+    assert(slam.lastAttemptValueForHighestRDDId() === Some(100))
+    // Dataset dedups per scope and returns only the latest RDD's value.
+    assert(slam.lastAttemptValueForDataset(df) === Some(100))
+  }
+
+  test("SLAM with AQE CoalesceShufflePartitions") {
+    // Adapted from tests in CoalesceShufflePartitionsSuite
+
+    val stage1Slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "SLAM2")
+    val stage2Slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "SLAM2")
+    val stage1MetricExpr = Column(incrementMetric(stage1Slam))
+    val stage2MetricExpr = Column(incrementMetric(stage2Slam))
+
+    // Dataframe with a SLAM before and after a shuffle.
+    val df = spark.range(0, 1000, 1, numPartitions = 10)
+      .selectExpr("id % 20 as key", "id as value")
+      .filter(stage1MetricExpr)
+      .groupBy("key").count()
+      .filter(stage2MetricExpr)
+
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "2000") {
+      df.collect()
+    }
+
+    // Verify the AQE coalescing happened and coalesced the shuffle into 3 partitions.
+    // (based on ADVISORY_PARTITION_SIZE_IN_BYTES config)
+    val finalPlan = AdaptiveSparkPlanHelper.stripAQEPlan(df.queryExecution.executedPlan)
+    val shuffleReads = finalPlan.collect {
+      case r @ CoalescedShuffleRead() => r
+    }
+    assert(shuffleReads.nonEmpty)
+    shuffleReads.foreach { read =>
+      // check there is actual coalescing of partitions happening
+      assert(read.isCoalescedRead)
+      assert(read.partitionSpecs.exists {
+        case p: CoalescedPartitionSpec if p.startReducerIndex < p.endReducerIndex - 1 => true
+        case _ => false
+      })
+    }
+
+    // Verify SLAM metrics.
+    assert(stage1Slam.lastAttemptValueForHighestRDDId() === Some(1000))
+    assert(stage2Slam.lastAttemptValueForHighestRDDId() === Some(20))
+    assert(stage1Slam.lastAttemptValueForDataset(df) === Some(1000))
+    assert(stage2Slam.lastAttemptValueForDataset(df) === Some(20))
+  }
+
+  test("WholeStageCodegenExec fallback to non-codegen") {
+    withSQLConf(
+      SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
+      SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.key -> "1" // force fallback due to too large method
+    ) {
+      // This test is to verify that SLAM works correctly when WholeStageCodegenExec falls back
+      // to non-codegen execution.
+      val slam = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+      val df = spark
+        .range(10)
+        .filter(Column(incrementMetric(slam)))
+        // these two operators will be turned into a WholeStageCodegen,
+        .selectExpr("id + 1 as foo", "id + 2 as bar")
+        .filter("foo < bar")
+      df.collect()
+      assert(slam.lastAttemptValueForDataset(df) === Some(10))
+      // Metric is attributed to the child of the WSCG node.
+      val wscg = df.queryExecution.executedPlan.collectFirst {
+        case w: WholeStageCodegenExec => w
+      }
+      assert(wscg.isDefined)
+      assert(slam.getHighestRDDId.isDefined)
+    }
+  }
+}
+
+class SQLLastAttemptMetricIntegrationSuiteWithStageRetries
+    extends SQLLastAttemptMetricIntegrationSuite {
+  override protected def withRetries = true
+
+  override protected def test(
+      testName: String,
+      testTags: org.scalatest.Tag*)
+      (testFun: => Any)
+      (implicit pos: org.scalactic.source.Position): Unit = {
+    super.test(testName, testTags : _*) {
+      withSparkContextConf(config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key -> "true") {
+        // Stage retries should not affect SLAM metrics.
+        testFun
+      }
+    }(pos)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricPlanShapesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricPlanShapesSuite.scala
new file mode 100644
index 0000000000000..ea8d9568f7e4b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricPlanShapesSuite.scala
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.metric
+
+import scala.reflect.ClassTag
+import scala.util.Random
+
+import org.scalatest.Tag
+
+import org.apache.spark.internal.config
+import org.apache.spark.sql.execution.{CollectLimitExec, RDDScanExec, SparkPlan}
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, AQETestHelper, DisableAdaptiveExecutionSuite}
+import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+import org.apache.spark.sql.execution.exchange._
+import org.apache.spark.sql.functions.udf
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class SQLLastAttemptMetricPlanShapesSuite
+  extends SharedSparkSession
+  with SQLMetricsTestUtils
+  // Need to control AQE per-test to ensure expected plan shapes.
+  with DisableAdaptiveExecutionSuite {
+
+  import testImplicits._
+
+  import SQLLastAttemptMetricPlanShapesSuite._
+
+  // Avoid initialising this before the Spark Context is initialised.
+  protected var testSLAMetric: SQLLastAttemptMetric = _
+
+  protected def setUpTestTable(): Unit = {
+    val rand = new Random(1)
+    val randomPrefix = rand.nextString(30)
+    spark
+      .range(NUM_RECORDS)
+      .map { id =>
+        (id, (id % LOW_CARDINALITY).toInt, randomPrefix + (id % LARGE_CARDINALITY))
+      }.toDF("id", "low_cardinality_col", "large_col")
+      .write.format("parquet").saveAsTable(TABLE_NAME)
+    val numRecords = spark.read.table(TABLE_NAME).count()
+    assert(numRecords === 300)
+  }
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    setUpTestTable()
+    testSLAMetric = SQLLastAttemptMetrics.createMetric(spark.sparkContext, "test SLAM")
+    // Move this into a local field so the closure doesn't hang on to the whole `this`
+    // reference as well.
+    val metric = testSLAMetric
+    val incrementMetric = () => { metric += 1; true }
+    val incrementMetricUdf = udf(incrementMetric).asNondeterministic()
+    spark.udf.register("increment_metric", incrementMetricUdf)
+  }
+
+  override protected def afterAll(): Unit = {
+    spark.sql(s"DROP TABLE IF EXISTS $TABLE_NAME")
+    super.afterAll()
+  }
+
+  override protected def beforeEach(): Unit = {
+    super.beforeEach()
+    // note: reset() does not influence lastAttemptValue, but influences regular value
+    testSLAMetric.reset()
+  }
+
+  object MetricValue {
+    type Check = Option[Long] => Unit
+
+    // Having the asserts in these helpers instead of in testPhysicalPlanShape
+    // produces better error messages.
+    def exactly(expectedValue: Long): Check = actualValue =>
+      assert(actualValue === Some(expectedValue))
+
+    def atLeast(minimumValue: Long): Check = { actualValue =>
+      assert(actualValue.isDefined)
+      assert(actualValue.get >= minimumValue)
+    }
+  }
+
+  object PhysicalPlan {
+    type Check = SparkPlan => Unit
+
+    val ANY: Check = _ => () //  Ignore.
+
+    def contains[T <: SparkPlan: ClassTag](implicit cls: ClassTag[T]): Check = { plan =>
+      val existsSomeNodeOfTypeT =
+        AdaptiveSparkPlanHelper.existsWithSubqueries(plan)(_.getClass == cls.runtimeClass)
+      assert(
+        existsSomeNodeOfTypeT,
+        s"Expected a node ${cls.runtimeClass.getSimpleName}. Actual Plan:\n${plan.treeString}")
+    }
+
+    def exists(pf: PartialFunction[SparkPlan, Boolean]): Check = { plan =>
+      val existsMatchingNode =
+        AdaptiveSparkPlanHelper.existsWithSubqueries(plan)(pf.lift(_).getOrElse(false))
+      assert(
+        existsMatchingNode,
+        s"Unexpected plan (check match function). Actual Plan:\n${plan.treeString}")
+    }
+
+    def isAQE: Boolean = SQLConf.get.getConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED)
+
+    def hasStageRetries: Boolean = spark.sparkContext.conf
+      .getOption(config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key).contains("true")
+
+    def hasAQEReplans: Boolean = AQETestHelper.isForcedCancellationEnabled
+  }
+
+  protected def testPhysicalPlanShape(
+      label: String,
+      setup: () => Unit = () => (),
+      extraSQLConfs: Map[String, String] = Map.empty,
+      sqlQuery: String,
+      executedPlanCheck: PhysicalPlan.Check,
+      metricValueCheck: MetricValue.Check
+  )(testTags: Tag*): Unit = {
+    for {
+      useAQE <- BOOLEAN_DOMAIN
+      stageRetries <- BOOLEAN_DOMAIN
+      aqeReplans <- if (useAQE) BOOLEAN_DOMAIN else Seq(false)
+    } test(s"$label - " +
+        s"useAQE=$useAQE, stageRetries=$stageRetries, aqeReplans=$aqeReplans",
+        testTags: _*) {
+
+      // There is some special handling for df.cache() / df.persist() / df.localCheckpoint() tests.
+      val cachedPlanTest = label.startsWith("cache - ")
+
+      withSQLConf(
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> useAQE.toString) {
+        setup()
+        withSQLConf(extraSQLConfs.toSeq: _*) {
+          val aqeRetryMetrics = if (aqeReplans) Seq(testSLAMetric) else Seq.empty
+          AQETestHelper.withForcedCancellation(aqeRetryMetrics: _*) {
+            withSparkContextConf(
+                config.Tests.INJECT_SHUFFLE_FETCH_FAILURES.key -> stageRetries.toString) {
+              val resultDf = spark.sql(sqlQuery)
+              val _ = resultDf.collect()
+
+              // normal value of the metrics shall not work with retries or replans
+              if (!stageRetries && !aqeReplans) {
+                metricValueCheck(Some(testSLAMetric.value))
+              }
+              // test LastRDDValue
+              metricValueCheck(testSLAMetric.lastAttemptValueForHighestRDDId())
+              // test Dataset value
+              if (!cachedPlanTest) {
+                // SLAM.lastAttemptValueForDataset is undefined when SLAM is inside
+                // cached or checkpointed plan.
+                metricValueCheck(testSLAMetric.lastAttemptValueForDataset(resultDf))
+              }
+              // test expected plan shape
+              val executedPlan = resultDf.queryExecution.executedPlan
+              executedPlanCheck(executedPlan)
+              val rddIdExec = testSLAMetric.getHighestRDDId
+
+              // Repeated execution should not affect SLAM metric value
+              resultDf.collect()
+              // test LastRDDValue again
+              metricValueCheck(testSLAMetric.lastAttemptValueForHighestRDDId())
+              // test Dataset value again
+              if (!cachedPlanTest) {
+                // SLAM.lastAttemptValueForDataset is undefined when SLAM is inside
+                // cached or checkpointed plan.
+                metricValueCheck(testSLAMetric.lastAttemptValueForDataset(resultDf))
+              }
+
+              // count() transformation creates a new Dataset.
+              // It should not affect the SLAM metric value of the first Dataset.
+              resultDf.count()
+              // test Dataset value again
+              if (!cachedPlanTest) {
+                // SLAM.lastAttemptValueForDataset is undefined when SLAM is inside
+                // cached or checkpointed plan.
+                metricValueCheck(testSLAMetric.lastAttemptValueForDataset(resultDf))
+              }
+              // This should have created a new plan and executed new RDDs,
+              // unless it's a test of cached plan.
+              val rddIdExecCount = testSLAMetric.getHighestRDDId
+              if (cachedPlanTest) {
+                assert(rddIdExecCount === rddIdExec)
+              } else {
+                // count() creates a new plan with new RDDs.
+                assert(rddIdExecCount.get > rddIdExec.get)
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  protected def testPlanShape(
+      label: String,
+      sqlQuery: String,
+      // Assert on the result of the test metric.
+      metricValueCheck: MetricValue.Check,
+      testTags: Tag*
+  ): Unit = {
+    testPhysicalPlanShape(
+      label = label,
+      sqlQuery = sqlQuery,
+      executedPlanCheck = PhysicalPlan.ANY,
+      metricValueCheck = metricValueCheck
+    )(testTags: _*)
+  }
+
+  testPlanShape(
+    label = "simple plan",
+    sqlQuery = s"SELECT * FROM $TABLE_NAME WHERE increment_metric()",
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS)
+  )
+
+  /* ********************
+  * Various Subquery Plans
+  * ********************** */
+  testPlanShape(
+    label = "subquery - IN",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME
+         | WHERE id IN (
+         |   SELECT low_cardinality_col
+         |   FROM $TABLE_NAME
+         |   WHERE increment_metric())""".stripMargin,
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS)
+  )
+
+  testPlanShape(
+    label = "subquery - IN - aggregation",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME
+         | WHERE id IN (
+         |   SELECT DISTINCT(low_cardinality_col)
+         |   FROM $TABLE_NAME
+         |   WHERE increment_metric())""".stripMargin,
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS)
+  )
+
+  testPlanShape(
+    label = "subquery - IN - TVF",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME
+         | WHERE id IN (
+         |   SELECT *
+         |   FROM range(5)
+         |   WHERE increment_metric())""".stripMargin,
+    metricValueCheck = MetricValue.exactly(5)
+  )
+
+  testPlanShape(
+    label = "subquery - IN - explode",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME
+         | WHERE id IN (
+         |   SELECT explode(array(low_cardinality_col, low_cardinality_col + 1))
+         |   FROM $TABLE_NAME
+         |   WHERE increment_metric())""".stripMargin,
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS)
+  )
+
+  testPlanShape(
+    label = "subquery - IN - lateral view explode",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME
+         | WHERE id IN (
+         |   SELECT new_column
+         |   FROM $TABLE_NAME LATERAL VIEW
+         |     explode(array(low_cardinality_col, low_cardinality_col + 1)) AS new_column
+         |   WHERE increment_metric())""".stripMargin,
+    metricValueCheck = MetricValue.exactly(2 * NUM_RECORDS)
+  )
+
+  testPlanShape(
+    label = "subquery - scalar",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME
+         | WHERE id == (
+         |   SELECT MAX(low_cardinality_col)
+         |   FROM $TABLE_NAME
+         |   WHERE increment_metric())""".stripMargin,
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS)
+  )
+
+  testPhysicalPlanShape(
+    label = "subquery - EXISTS",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME
+         | WHERE EXISTS (
+         |   SELECT low_cardinality_col
+         |   FROM $TABLE_NAME
+         |   WHERE increment_metric())""".stripMargin,
+    // This turns into a LIMIT query.
+    metricValueCheck = MetricValue.atLeast(1),
+    executedPlanCheck = PhysicalPlan.contains[CollectLimitExec]
+  )()
+
+  testPhysicalPlanShape(
+    label = "subquery - EXISTS (correlated)",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME outer_table
+         | WHERE EXISTS (
+         |   SELECT low_cardinality_col
+         |   FROM $TABLE_NAME inner_table
+         |   WHERE increment_metric()
+         |     AND inner_table.low_cardinality_col == outer_table.low_cardinality_col)
+         |     """.stripMargin,
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS),
+    executedPlanCheck = PhysicalPlan.exists {
+      case _: BroadcastExchangeExec => true
+      case _: ShuffleExchangeExec => true
+      case _: ReusedExchangeExec => true
+    }
+  )()
+
+  /* *****************************
+  * Plans with different Exchanges
+  * ****************************** */
+
+  /*
+   * To cover:
+   * - ShuffleExchangeLike
+   *   - ShuffleExchangeExec: covered by exchange - Shuffle
+   * - ReusedExchangeExec: covered by exchange - ReusedExchangeExec
+   * - BroadcastExchangeLike:
+   *   - BroadcastExchangeExec: covered above by subquery - EXISTS (correlated))
+   * - InMemoryTableScanLike (InMemoryTableScanExec): covered by exchange - InMemoryTableScanExec
+   */
+
+  testPhysicalPlanShape(
+    label = "exchange - Shuffle",
+    sqlQuery =
+      s"""SELECT *
+         | FROM $TABLE_NAME orig
+         | FULL OUTER JOIN (
+         |   SELECT *
+         |   FROM $TABLE_NAME
+         |   WHERE increment_metric()
+         | ) with_metric USING (id)""".stripMargin,
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS),
+    executedPlanCheck = PhysicalPlan.exists {
+      case _: ShuffleExchangeExec => true
+      // After forced AQE replans it may use ReusedExchange.
+      case _: ReusedExchangeExec if PhysicalPlan.hasAQEReplans => true
+    }
+  )()
+
+  testPhysicalPlanShape(
+    label = "exchange - ReusedExchangeExec",
+    sqlQuery =
+      s"""WITH subquery_with_metric AS (
+        |   SELECT *
+        |   FROM $TABLE_NAME
+        |   WHERE increment_metric()
+        | )
+        |SELECT *
+        | FROM subquery_with_metric a JOIN subquery_with_metric b USING (id)""".stripMargin,
+    metricValueCheck = MetricValue.exactly(NUM_RECORDS),
+    executedPlanCheck = PhysicalPlan.contains[ReusedExchangeExec]
+  )()
+
+  for (eager <- Seq("true", "false", "manual")) {
+    // SLAM metric in the top stage of cached query.
+    testPhysicalPlanShape(
+      label = s"cache - InMemoryTableScanExec - result stage - eager=$eager",
+      setup = () => {
+        spark.sql(s"""
+           |CREATE OR REPLACE TEMP VIEW table_with_metric AS (
+           |  SELECT low_cardinality_col
+           |  FROM $TABLE_NAME
+           |  WHERE increment_metric()
+           |)""".stripMargin)
+        if (eager == "true") {
+          spark.sql("CACHE TABLE table_with_metric")
+        } else { // false or manual
+          spark.sql("CACHE LAZY TABLE table_with_metric")
+        }
+        if (eager == "manual") {
+          spark.sql("select count(*) from table_with_metric").collect()
+        }
+      },
+      sqlQuery =
+        s"""SELECT *
+           | FROM $TABLE_NAME
+           | WHERE id IN (SELECT * FROM table_with_metric)""".stripMargin,
+      metricValueCheck = MetricValue.exactly(NUM_RECORDS),
+      executedPlanCheck = PhysicalPlan.contains[InMemoryTableScanExec]
+    )()
+
+    // SLAM metric in the map stage of cached query.
+    testPhysicalPlanShape(
+      label = s"cache - InMemoryTableScanExec - map stage - eager=$eager",
+      setup = () => {
+        spark.sql(s"""
+          |CREATE OR REPLACE TEMP VIEW table_with_metric AS (
+          |  SELECT id, SUM(low_cardinality_col)
+          |  FROM $TABLE_NAME
+          |  WHERE increment_metric()
+          |  GROUP BY id
+          |)""".stripMargin)
+        if (eager == "true") {
+          spark.sql("CACHE TABLE table_with_metric")
+        } else { // false or manual
+          spark.sql("CACHE LAZY TABLE table_with_metric")
+        }
+        if (eager == "manual") {
+          spark.sql("select count(*) from table_with_metric").collect()
+        }
+      },
+      sqlQuery =
+        s"""SELECT *
+           | FROM table_with_metric""".stripMargin,
+      metricValueCheck = MetricValue.exactly(NUM_RECORDS),
+      executedPlanCheck = PhysicalPlan.contains[InMemoryTableScanExec]
+    )()
+
+    testPhysicalPlanShape(
+      label = s"cache - localCheckpoint - result stage - eager=$eager",
+      setup = () => {
+        val df = spark.sql(s"""
+            |SELECT low_cardinality_col
+            |FROM $TABLE_NAME
+            |WHERE increment_metric()""".stripMargin)
+        val cpEager = if (eager == "true") true else false
+        val cpDf = df.localCheckpoint(eager = cpEager)
+        if (eager == "manual") {
+          cpDf.count()
+        }
+        cpDf.createOrReplaceTempView("cp_table_with_metric")
+      },
+      sqlQuery =
+        s"""SELECT *
+           | FROM cp_table_with_metric""".stripMargin,
+      metricValueCheck = MetricValue.exactly(NUM_RECORDS),
+      executedPlanCheck = PhysicalPlan.contains[RDDScanExec]
+    )()
+
+    testPhysicalPlanShape(
+      label = s"cache - localCheckpoint - map stage - eager=$eager",
+      setup = () => {
+        val df = spark.sql(s"""
+          |SELECT id, SUM(low_cardinality_col)
+          |FROM $TABLE_NAME
+          |WHERE increment_metric()
+          |GROUP BY id""".stripMargin)
+        val cpEager = if (eager == "true") true else false
+        val cpDf = df.localCheckpoint(eager = cpEager)
+        if (eager == "manual") {
+          cpDf.count()
+        }
+        cpDf.createOrReplaceTempView("cp_table_with_metric")
+      },
+      sqlQuery =
+        s"""SELECT *
+           | FROM cp_table_with_metric""".stripMargin,
+      metricValueCheck = MetricValue.exactly(NUM_RECORDS),
+      executedPlanCheck = PhysicalPlan.contains[RDDScanExec]
+    )()
+  }
+}
+
+object SQLLastAttemptMetricPlanShapesSuite {
+  val NUM_RECORDS: Long = 300
+  val LOW_CARDINALITY: Int = 5
+  val LARGE_CARDINALITY: Int = 111
+
+  val TABLE_NAME: String = "test_table"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricUnitSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricUnitSuite.scala
new file mode 100644
index 0000000000000..45dff0fe4aa91
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLLastAttemptMetricUnitSuite.scala
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.metric
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
+import java.util.Properties
+
+import org.mockito.Mockito.when
+import org.scalatestplus.mockito.MockitoSugar.mock
+
+import org.apache.spark.{SharedSparkContext, SparkFunSuite}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.scheduler.TaskInfo
+
+/** Tests internals of [[SQLLastAttemptMetric]]. */
+class SQLLastAttemptMetricUnitSuite extends SparkFunSuite with SharedSparkContext {
+
+  // scalastyle:off classforname
+  private val sqlLastAttemptMetricClass = Class
+    .forName("org.apache.spark.sql.execution.metric.SQLLastAttemptMetric")
+  // scalastyle:on classforname
+
+  private val lastAttemptInitializedField =
+    sqlLastAttemptMetricClass.getDeclaredField("lastAttemptAccumulatorInitialized")
+
+  private val lastAttemptRddsMapField =
+    sqlLastAttemptMetricClass.getDeclaredField(
+      "org$apache$spark$util$LastAttemptAccumulator$$lastAttemptRddsMap")
+
+  private val directDriverValueField =
+    sqlLastAttemptMetricClass.getDeclaredField(
+      "org$apache$spark$util$LastAttemptAccumulator$$lastAttemptDirectDriverValue")
+
+  private val partialMergeValMethod = sqlLastAttemptMetricClass.getMethod("partialMergeVal")
+
+  private val mockRdd = mock[RDD[_]]
+  private val mockTaskInfo = mock[TaskInfo]
+  private val mockProperties = new Properties
+
+  // Set mock attempt for mock Task, TaskInfo and RDD
+  // that can be used with mergeLastAttempt.
+  // stageId and stageAttemptId are passed directly to mergeLastAttempt.
+  def setMockAttempt(rddId: Int, partitionId: Int): Unit = {
+    // reset to mock defaults
+    when(mockTaskInfo.attemptNumber).thenReturn(0)
+    when(mockRdd.scope).thenReturn(None)
+    when(mockRdd.getNumPartitions).thenReturn(5)
+
+    when(mockRdd.id).thenReturn(rddId)
+    when(mockTaskInfo.partitionId).thenReturn(partitionId)
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    lastAttemptInitializedField.setAccessible(true)
+    lastAttemptRddsMapField.setAccessible(true)
+    directDriverValueField.setAccessible(true)
+    partialMergeValMethod.setAccessible(true)
+  }
+
+  override def afterAll(): Unit = {
+    lastAttemptInitializedField.setAccessible(false)
+    lastAttemptRddsMapField.setAccessible(false)
+    directDriverValueField.setAccessible(false)
+    partialMergeValMethod.setAccessible(false)
+    super.afterAll()
+  }
+
+  test("serialization and deserialization") {
+    val slam = SQLLastAttemptMetrics.createMetric(sc, "test SLAM")
+
+    assert(lastAttemptInitializedField.getBoolean(slam) === true)
+    assert(lastAttemptRddsMapField.get(slam) != null)
+    assert(directDriverValueField.get(slam) != null)
+
+    // Serialize slam to ObjectOutputStream and deserialize it back.
+    val obs1 = new ByteArrayOutputStream()
+    val oos1 = new ObjectOutputStream(obs1)
+    oos1.writeObject(slam)
+    oos1.close()
+    val ois1 = new ObjectInputStream(new ByteArrayInputStream(obs1.toByteArray))
+    val deser = ois1.readObject().asInstanceOf[SQLLastAttemptMetric]
+
+    // serialized version should not be initialized
+    assert(lastAttemptInitializedField.getBoolean(deser) === false)
+    assert(lastAttemptRddsMapField.get(deser) == null)
+    assert(directDriverValueField.get(deser) == null)
+
+    deser.set(42)
+    deser.add(7)
+    assert(deser.value === 49)
+    // these functions shouldn't be used on the deserialized metric,
+    // but assertions should be caught and None should be returned.
+    assert(deser.lastAttemptValueForHighestRDDId() === None)
+    assert(deser.lastAttemptValueForRDDId(1) === None)
+    assert(deser.lastAttemptValueForRDDIds(Seq(1, 2, 3)) === None)
+    assert(deser.lastAttemptValueForAllRDDs() === None)
+    // mergeLastAttempt shouldn't be used on the deserialized metric,
+    // but it should catch error and not fail.
+    deser.mergeLastAttempt(slam, null, null, 0, 0, null)
+
+    // Serialize and deserialize again.
+    val obs2 = new ByteArrayOutputStream()
+    val oos2 = new ObjectOutputStream(obs2)
+    oos2.writeObject(deser)
+    oos2.close()
+    val ois2 = new ObjectInputStream(new ByteArrayInputStream(obs2.toByteArray))
+    val reser = ois2.readObject().asInstanceOf[SQLLastAttemptMetric]
+    // Check that the value is brought back and can be used as partialMergeVal.
+    assert(reser.value === 49L)
+    assert(partialMergeValMethod.invoke(reser) === 49L)
+  }
+
+  test("copy and mergeLastAttempt") {
+    val slam = SQLLastAttemptMetrics.createMetric(sc, "test SLAM")
+
+    assert(lastAttemptInitializedField.getBoolean(slam) == true)
+    assert(lastAttemptRddsMapField.get(slam) != null)
+    assert(directDriverValueField.get(slam) != null)
+
+    // copy should not initialize SLAM data.
+    val acc = slam.copy()
+    assert(lastAttemptInitializedField.getBoolean(acc) == false)
+    assert(lastAttemptRddsMapField.get(acc) == null)
+    assert(directDriverValueField.get(acc) == null)
+    // these functions shouldn't be used on the copy,
+    // but assertions should be caught and None should be returned.
+    assert(acc.lastAttemptValueForHighestRDDId() === None)
+    assert(acc.lastAttemptValueForRDDId(1) === None)
+    assert(acc.lastAttemptValueForRDDIds(Seq(1, 2, 3)) === None)
+    assert(acc.lastAttemptValueForAllRDDs() === None)
+    // mergeLastAttempt shouldn't be used on the copy,
+    // but it should catch error and not fail.
+    acc.mergeLastAttempt(slam, null, null, 0, 0, null)
+
+    // Let's play with merging acc into slam.
+    setMockAttempt(rddId = 1, partitionId = 0)
+    acc.set(10)
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 10, 10, mockProperties)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(10))
+
+    setMockAttempt(rddId = 1, partitionId = 1)
+    acc.set(10) // new partition id
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 10, 10, mockProperties)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(20)) // 10 + 10, aggregated new partition id
+
+    setMockAttempt(rddId = 1, partitionId = 1)
+    acc.set(7) // same partition id, older attempt.
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 10, 9, mockProperties)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(20)) // no change
+
+    setMockAttempt(rddId = 1, partitionId = 1)
+    acc.set(7) // same partition id, older stage.
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 9, 11, mockProperties)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(20)) // no change
+
+    setMockAttempt(rddId = 1, partitionId = 1)
+    acc.set(7) // same partition id, newer attempt.
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 10, 11, mockProperties)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(17)) // 10 replaced with 7
+
+    setMockAttempt(rddId = 1, partitionId = 1)
+    acc.set(8) // same partition id, newer stage.
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 11, 1, mockProperties)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(18)) // 7 replaced with 8
+
+    setMockAttempt(rddId = 2, partitionId = 2)
+    acc.set(42) // new RDD
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 1, 1, mockProperties)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(18)) // no change for rddId=1
+    assert(slam.lastAttemptValueForRDDId(2) === Some(42)) // new RDD added
+    assert(slam.lastAttemptValueForAllRDDs() === Some(60))
+  }
+
+  test("compact storage: per-component override arrays allocated only when component diverges") {
+    val slam = SQLLastAttemptMetrics.createMetric(sc, "test SLAM")
+    val acc = slam.copy()
+
+    val rddsMap = lastAttemptRddsMapField.get(slam)
+    val mapGetMethod = rddsMap.getClass.getMethod("get", classOf[Object])
+
+    // Establish a common attempt across all 5 mock partitions.
+    for (partId <- 0 until 5) {
+      setMockAttempt(rddId = 1, partitionId = partId)
+      acc.set(10)
+      slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 10, 10, mockProperties)
+    }
+    val rddVals = mapGetMethod.invoke(rddsMap, Int.box(1)).asInstanceOf[Option[Object]].get
+    val rddValsClass = rddVals.getClass
+    // @specialized var fields are emitted with `org$apache$spark$util$LastAttemptRDDVals$$`
+    // prefix so specialized subclasses can override them.
+    val fieldPrefix = "org$apache$spark$util$LastAttemptRDDVals$$"
+    val commonStageIdFld = rddValsClass.getDeclaredField(fieldPrefix + "commonStageId")
+    commonStageIdFld.setAccessible(true)
+    val commonStageAttemptIdFld =
+      rddValsClass.getDeclaredField(fieldPrefix + "commonStageAttemptId")
+    commonStageAttemptIdFld.setAccessible(true)
+    val commonTaskAttemptNumberFld =
+      rddValsClass.getDeclaredField(fieldPrefix + "commonTaskAttemptNumber")
+    commonTaskAttemptNumberFld.setAccessible(true)
+    val overrideStageIdsFld = rddValsClass.getDeclaredField(fieldPrefix + "overrideStageIds")
+    overrideStageIdsFld.setAccessible(true)
+    val overrideStageAttemptIdsFld =
+      rddValsClass.getDeclaredField(fieldPrefix + "overrideStageAttemptIds")
+    overrideStageAttemptIdsFld.setAccessible(true)
+    val overrideTaskAttemptNumbersFld =
+      rddValsClass.getDeclaredField(fieldPrefix + "overrideTaskAttemptNumbers")
+    overrideTaskAttemptNumbersFld.setAccessible(true)
+
+    // No retries: common is set, none of the override arrays are allocated.
+    assert(commonStageIdFld.getInt(rddVals) === 10)
+    assert(commonStageAttemptIdFld.getInt(rddVals) === 10)
+    assert(commonTaskAttemptNumberFld.getInt(rddVals) === 0)
+    assert(overrideStageIdsFld.get(rddVals) === null)
+    assert(overrideStageAttemptIdsFld.get(rddVals) === null)
+    assert(overrideTaskAttemptNumbersFld.get(rddVals) === null)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(50))
+
+    // Pure stage-attempt retry of partition 0: only stageAttemptId diverges from the common.
+    setMockAttempt(rddId = 1, partitionId = 0)
+    acc.set(20)
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 10, 11, mockProperties)
+    assert(overrideStageIdsFld.get(rddVals) === null,
+      "stageId still matches common; its override array should not be allocated")
+    assert(overrideStageAttemptIdsFld.get(rddVals) != null,
+      "stageAttemptId diverged; its override array should be allocated")
+    assert(overrideTaskAttemptNumbersFld.get(rddVals) === null,
+      "taskAttemptNumber still matches common; its override array should not be allocated")
+    val saIds1 = overrideStageAttemptIdsFld.get(rddVals).asInstanceOf[Array[Int]]
+    assert(saIds1.length === 5)
+    assert(saIds1(0) === 11)
+    assert(saIds1(1) === -1, "Untouched partitions should hold EMPTY_ID (= -1) sentinel")
+    assert(slam.lastAttemptValueForRDDId(1) === Some(60)) // 20 + 10*4
+
+    // Mid-stage retry of partition 1: only taskAttemptNumber diverges.
+    when(mockTaskInfo.attemptNumber).thenReturn(1)
+    when(mockRdd.id).thenReturn(1)
+    when(mockTaskInfo.partitionId).thenReturn(1)
+    acc.set(15)
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 10, 10, mockProperties)
+    assert(overrideStageIdsFld.get(rddVals) === null)
+    assert(overrideTaskAttemptNumbersFld.get(rddVals) != null,
+      "taskAttemptNumber diverged; its override array should be allocated")
+    val tans2 = overrideTaskAttemptNumbersFld.get(rddVals).asInstanceOf[Array[Int]]
+    assert(tans2(1) === 1)
+    assert(tans2(0) === -1)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(65)) // 20 + 15 + 10*3
+
+    // Cross-stage retry of partition 2 (new stageId). Now stageId also diverges.
+    when(mockTaskInfo.attemptNumber).thenReturn(0)
+    when(mockTaskInfo.partitionId).thenReturn(2)
+    acc.set(30)
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 11, 0, mockProperties)
+    assert(overrideStageIdsFld.get(rddVals) != null,
+      "stageId now diverges; its override array should be allocated")
+    val sIds3 = overrideStageIdsFld.get(rddVals).asInstanceOf[Array[Int]]
+    assert(sIds3(2) === 11)
+    assert(sIds3(0) === -1)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(85)) // 20 + 15 + 30 + 10*2
+
+    // Re-update partition 0 with a value that brings stageAttemptId back to common (10) while
+    // diverging stageId (12). Once an override array exists, every update writes its value into
+    // the slot - even when the value equals the common - so partition 0's stageAttemptId entry
+    // becomes 10 (rather than being cleared to EMPTY_ID).
+    when(mockTaskInfo.partitionId).thenReturn(0)
+    acc.set(40)
+    slam.mergeLastAttempt(acc, mockRdd, mockTaskInfo, 12, 10, mockProperties)
+    val saIds4 = overrideStageAttemptIdsFld.get(rddVals).asInstanceOf[Array[Int]]
+    assert(saIds4(0) === 10,
+      "Partition 0's stageAttemptId entry should hold the new value (which happens to equal " +
+        "the common), not EMPTY_ID")
+    val sIds4 = overrideStageIdsFld.get(rddVals).asInstanceOf[Array[Int]]
+    assert(sIds4(0) === 12)
+    assert(slam.lastAttemptValueForRDDId(1) === Some(105)) // 40 + 15 + 30 + 10*2
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 58457091a4a42..16cde3ef2c581 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -125,6 +125,23 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     }
   }
 
+  test("SPARK-57313: Sample numOutputRows metric") {
+    Seq(false, true).foreach { withReplacement =>
+      Seq("false", "true").foreach { enableWholeStage =>
+        withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> enableWholeStage) {
+          val df = spark.range(0, 1000, 1, 1)
+            .sample(withReplacement = withReplacement, fraction = 0.5, seed = 1)
+          val expectedRows = df.collect().length
+          sparkContext.listenerBus.waitUntilEmpty()
+          val sample = df.queryExecution.executedPlan.collect {
+            case s: SampleExec => s
+          }
+          assert(sample.size == 1)
+          assert(sample.head.metrics("numOutputRows").value == expectedRows)
+        }
+      }
+    }
+  }
 
   test("Recursive CTEs metrics") {
     withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
@@ -739,6 +756,21 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     }
   }
 
+  test("UnionExec.numOutputRows reports total row count under fusion") {
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "true") {
+      val name = "demo_view"
+      withView(name) {
+        sql(s"CREATE OR REPLACE VIEW $name AS VALUES 1,2")
+        val union = spark.table(name).union(spark.table(name))
+        union.collect()
+        val unionExec = union.queryExecution.executedPlan.collectFirst {
+          case u: UnionExec => u
+        }.get
+        assert(unionExec.metrics("numOutputRows").value == 4L)
+      }
+    }
+  }
+
   test("writing data out metrics: parquet") {
     testMetricsNonDynamicPartition("parquet", "t1")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index e8902ed6fb1a1..483d2a72637d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -25,14 +25,38 @@ import org.apache.spark.TestUtils
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.execution.{SparkPlan, SparkPlanInfo}
 import org.apache.spark.sql.execution.ui.{SparkPlanGraph, SQLAppStatusStore}
+import org.apache.spark.sql.functions.udf
 import org.apache.spark.sql.internal.SQLConf.WHOLESTAGE_CODEGEN_ENABLED
 
 
 trait SQLMetricsTestUtils extends QueryTest {
   import testImplicits._
 
+  protected val BOOLEAN_DOMAIN: Seq[Boolean] = Seq(true, false)
+
+  /**
+   * @return An `Expression` that increments a SQL metric and
+   *         evaluates to true. Can be used in a filter.
+   */
+  protected def incrementMetric(
+      metric: SQLMetric): Expression = {
+    udf { () =>
+      { metric += 1; true }
+    }.asNondeterministic().apply().expr
+  }
+
+  /** @return An `Expression` to increment multiple SQL metrics */
+  protected def incrementMetrics(metrics: Seq[SQLMetric]): Expression = {
+    metrics.map(incrementMetric(_)).fold(
+      org.apache.spark.sql.catalyst.expressions.Literal(true): Expression) {
+      (acc, incrMetric) =>
+        org.apache.spark.sql.catalyst.expressions.And(acc, incrMetric)
+    }
+  }
+
   protected def currentExecutionIds(): Set[Long] = {
     spark.sparkContext.listenerBus.waitUntilEmpty(10000)
     statusStore.executionsList().map(_.executionId).toSet
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/EvaluatePythonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/EvaluatePythonSuite.scala
new file mode 100644
index 0000000000000..ec26f1b2a865f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/EvaluatePythonSuite.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException, SparkRuntimeException}
+import org.apache.spark.sql.catalyst.util.STUtils
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.BinaryView
+
+class EvaluatePythonSuite extends SparkFunSuite {
+
+  // POINT(1 2) in WKB, little-endian.
+  private val pointWkb: Array[Byte] = "010100000000000000000031400000000000001C40"
+    .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
+
+  private def pyGeo(srid: Int, wkb: Array[Byte]): java.util.HashMap[String, Any] = {
+    val m = new java.util.HashMap[String, Any]()
+    m.put("srid", srid)
+    m.put("wkb", wkb)
+    m
+  }
+
+  // ----- GeographyType -----
+
+  test("makeFromJava(GeographyType): preserves per-row SRID for fixed-SRID columns") {
+    // Geography supports a variety of geographic SRIDs beyond the default 4326. Ensure that the
+    // SRID is preserved on the Python -> Catalyst conversion path.
+    Seq(4267, 4269, 4326, 4612, 37001, 104030).foreach { srid =>
+      val convert = EvaluatePython.makeFromJava(GeographyType(srid))
+      val result = convert(pyGeo(srid, pointWkb))
+      assert(result.isInstanceOf[BinaryView])
+      assert(STUtils.stGeogSrid(result.asInstanceOf[BinaryView]) === srid)
+    }
+  }
+
+  test("makeFromJava(GeographyType ANY): preserves per-row SRID for mixed-SRID columns") {
+    val convert = EvaluatePython.makeFromJava(GeographyType("ANY"))
+    Seq(4267, 4269, 4326).foreach { srid =>
+      val result = convert(pyGeo(srid, pointWkb))
+      assert(result.isInstanceOf[BinaryView])
+      assert(STUtils.stGeogSrid(result.asInstanceOf[BinaryView]) === srid)
+    }
+  }
+
+  test("makeFromJava(GeographyType): rejects SRID mismatch on a fixed-SRID column") {
+    val convert = EvaluatePython.makeFromJava(GeographyType(4326))
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        convert(pyGeo(4267, pointWkb))
+      },
+      condition = "GEO_ENCODER_SRID_MISMATCH_ERROR",
+      parameters = Map("type" -> "GEOGRAPHY", "valueSrid" -> "4267", "typeSrid" -> "4326"))
+  }
+
+  test("makeFromJava(GeographyType ANY): rejects non-geographic SRID") {
+    val convert = EvaluatePython.makeFromJava(GeographyType("ANY"))
+    // SRID 0 is not a geographic SRID; even mixed-SRID columns must reject it.
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        convert(pyGeo(0, pointWkb))
+      },
+      condition = "ST_INVALID_SRID_VALUE",
+      parameters = Map("srid" -> "0"))
+    // SRID 3857 is a valid Cartesian SRID but not geographic.
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        convert(pyGeo(3857, pointWkb))
+      },
+      condition = "ST_INVALID_SRID_VALUE",
+      parameters = Map("srid" -> "3857"))
+  }
+
+  test("makeFromJava(GeographyType): null is preserved") {
+    val convert = EvaluatePython.makeFromJava(GeographyType(4326))
+    assert(convert(null) === null)
+  }
+
+  // ----- GeometryType -----
+
+  test("makeFromJava(GeometryType): preserves per-row SRID for fixed-SRID columns") {
+    // Geometry supports both the default SRID 0 and a variety of Cartesian/geographic SRIDs.
+    Seq(0, 3857, 4267, 4269, 4326, 32601, 102964).foreach { srid =>
+      val convert = EvaluatePython.makeFromJava(GeometryType(srid))
+      val result = convert(pyGeo(srid, pointWkb))
+      assert(result.isInstanceOf[BinaryView])
+      assert(STUtils.stGeomSrid(result.asInstanceOf[BinaryView]) === srid)
+    }
+  }
+
+  test("makeFromJava(GeometryType ANY): preserves per-row SRID for mixed-SRID columns") {
+    val convert = EvaluatePython.makeFromJava(GeometryType("ANY"))
+    Seq(0, 3857, 4267, 4269, 4326).foreach { srid =>
+      val result = convert(pyGeo(srid, pointWkb))
+      assert(result.isInstanceOf[BinaryView])
+      assert(STUtils.stGeomSrid(result.asInstanceOf[BinaryView]) === srid)
+    }
+  }
+
+  test("makeFromJava(GeometryType): rejects SRID mismatch on a fixed-SRID column") {
+    val convert = EvaluatePython.makeFromJava(GeometryType(0))
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        convert(pyGeo(4326, pointWkb))
+      },
+      condition = "GEO_ENCODER_SRID_MISMATCH_ERROR",
+      parameters = Map("type" -> "GEOMETRY", "valueSrid" -> "4326", "typeSrid" -> "0"))
+  }
+
+  test("makeFromJava(GeometryType ANY): rejects unsupported SRID") {
+    val convert = EvaluatePython.makeFromJava(GeometryType("ANY"))
+    // SRID 1 is not a registered SRID, so even mixed-SRID columns must reject it.
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        convert(pyGeo(1, pointWkb))
+      },
+      condition = "ST_INVALID_SRID_VALUE",
+      parameters = Map("srid" -> "1"))
+  }
+
+  test("makeFromJava(GeometryType): null is preserved") {
+    val convert = EvaluatePython.makeFromJava(GeometryType(0))
+    assert(convert(null) === null)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
index 03e23e6b466ca..19855f1724b9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
@@ -417,11 +417,20 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest
           inputData.addData(1, 2, 3, 4)
           query.processAllAvailable()
 
-          // With 2 partitions and bounded memory enabled, we should have
-          // 2 bounded memory providers registered and no unbounded ones
+          // With 2 partitions and bounded memory enabled, we should have 2 bounded memory
+          // providers registered for this query.
+          //
+          // We intentionally do NOT assert getNumRocksDBInstances(false) == 0 here.
+          // RocksDBMemoryManager is a global singleton whose instanceMemoryMap is shared across
+          // all tests in the JVM. A previous test running in unbounded mode can leave a straggler
+          // updateMemoryUsage(..., isBoundedMemory = false) call in flight that lands in the map
+          // after this test's resetWriteBufferManagerAndCache and is never followed by an
+          // unregisterInstance (the store has already closed). That stray unbounded entry then
+          // persists for the rest of the JVM, so an unbounded count of 0 cannot be guaranteed and
+          // eventually{} cannot drain it (this was the SPARK-55993 flake: "1 did not equal 0").
+          // Counting this query's bounded instances is the stable, meaningful signal.
           eventually(timeout(Span(10, Seconds)), interval(Span(500, Millis))) {
             assert(RocksDBMemoryManager.getNumRocksDBInstances(true) == 2)
-            assert(RocksDBMemoryManager.getNumRocksDBInstances(false) == 0)
           }
 
           // Add more data and check providers remain registered
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index 87560d1749562..dc697f5b99dc5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -3276,6 +3276,49 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession
     }
   }
 
+  Seq(true, false).foreach { boundedMemoryUsage =>
+    testWithColumnFamilies(
+      s"SPARK-57183: LRUCache is handled correctly on RocksDB.close() " +
+        s"with boundedMemoryUsage=$boundedMemoryUsage",
+      TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+      withTempDir { dir =>
+        try {
+          val sqlConf = new SQLConf
+          sqlConf.setConfString(
+            RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + "." +
+              RocksDBConf.BOUNDED_MEMORY_USAGE_CONF_KEY, boundedMemoryUsage.toString)
+          if (boundedMemoryUsage) {
+            sqlConf.setConfString(
+              RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + "." +
+                RocksDBConf.MAX_MEMORY_USAGE_MB_CONF_KEY, "128")
+          }
+          val dbConf = RocksDBConf(StateStoreConf(sqlConf))
+
+          val (_, cache) = withDB(dir.getCanonicalPath, conf = dbConf,
+            useColumnFamilies = colFamiliesEnabled) { db =>
+            db.load(0)
+            db.put("k", "v")
+            db.commit()
+            db.getWriteBufferManagerAndCache()
+          }
+          if (boundedMemoryUsage) {
+            // Shared singleton -- must remain open after a single instance closes
+            assert(cache.isOwningHandle,
+              "Shared LRUCache handle must not be released after a single RocksDB.close() " +
+                "in bounded mode")
+          } else {
+            // Per-instance cache -- must be released deterministically on close()
+            assert(!cache.isOwningHandle,
+              "LRUCache native handle should be released after RocksDB.close() " +
+                "in unbounded mode")
+          }
+        } finally {
+          RocksDBMemoryManager.resetWriteBufferManagerAndCache
+        }
+      }
+    }
+  }
+
   Seq("100", "1000", "100000").foreach { totalMemorySizeMB =>
     testWithColumnFamilies(s"Memory mgmt - valid config " +
       s"with totalMemorySizeMB=$totalMemorySizeMB",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
index 2bc4e53611ba4..6359688384065 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
@@ -66,6 +66,7 @@ abstract class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndA
     val page = new AllExecutionsPage(tab)
     val html = page.render(request).toString().toLowerCase(Locale.ROOT)
     assert(html.contains("sql-executions-table"))
+    assert(html.contains("sql-table-utils.js"))
     assert(html.contains("allexecutionspage.js"))
     assert(html.contains("datatables"))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorUtilsSuite.scala
index 6205484d6be70..b1c0d6c1d7d51 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorUtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorUtilsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.vectorized
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.unsafe.types.UTF8String
@@ -134,30 +135,108 @@ class ColumnVectorUtilsSuite extends SparkFunSuite {
     }
   }
 
-  testConstantColumnVector("not supported: fill map", 10,
+  testConstantColumnVector("fill array of ints", 10, ArrayType(IntegerType)) { vector =>
+    val arr = new GenericArrayData(Array[Any](1, 2, 3, 4, 5))
+    ColumnVectorUtils.populate(vector, InternalRow(arr), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getArray(i).toIntArray === Array(1, 2, 3, 4, 5))
+    }
+  }
+
+  testConstantColumnVector("fill array of strings", 10, ArrayType(StringType)) { vector =>
+    val arr = new GenericArrayData(Array[Any](
+      UTF8String.fromString("a"),
+      UTF8String.fromString("bb"),
+      UTF8String.fromString("ccc")))
+    ColumnVectorUtils.populate(vector, InternalRow(arr), 0)
+    (0 until 10).foreach { i =>
+      val a = vector.getArray(i)
+      assert(a.numElements() == 3)
+      assert(a.getUTF8String(0) == UTF8String.fromString("a"))
+      assert(a.getUTF8String(1) == UTF8String.fromString("bb"))
+      assert(a.getUTF8String(2) == UTF8String.fromString("ccc"))
+    }
+  }
+
+  testConstantColumnVector("fill map of int -> boolean", 10,
     MapType(IntegerType, BooleanType)) { vector =>
-    val message = intercept[RuntimeException] {
-      ColumnVectorUtils.populate(vector, InternalRow("fakeMap"), 0)
-    }.getMessage
-    assert(message == "DataType MAP<INT, BOOLEAN> is not supported in column vectorized reader.")
+    val keys = new GenericArrayData(Array[Any](1, 2, 3))
+    val values = new GenericArrayData(Array[Any](true, false, true))
+    val map = new ArrayBasedMapData(keys, values)
+    ColumnVectorUtils.populate(vector, InternalRow(map), 0)
+    (0 until 10).foreach { i =>
+      val m = vector.getMap(i)
+      assert(m.numElements() == 3)
+      assert(m.keyArray().toIntArray === Array(1, 2, 3))
+      assert(m.valueArray().toBooleanArray === Array(true, false, true))
+    }
   }
 
-  testConstantColumnVector("not supported: fill struct", 10,
+  testConstantColumnVector("fill struct", 10,
     new StructType()
       .add(StructField("name", StringType))
       .add(StructField("age", IntegerType))) { vector =>
-    val message = intercept[RuntimeException] {
-      ColumnVectorUtils.populate(vector, InternalRow("fakeStruct"), 0)
-    }.getMessage
-    assert(message ==
-      "DataType STRUCT<name: STRING, age: INT> is not supported in column vectorized reader.")
-  }
-
-  testConstantColumnVector("not supported: fill array", 10,
-    ArrayType(IntegerType)) { vector =>
-    val message = intercept[RuntimeException] {
-      ColumnVectorUtils.populate(vector, InternalRow("fakeArray"), 0)
-    }.getMessage
-    assert(message == "DataType ARRAY<INT> is not supported in column vectorized reader.")
+    val row = InternalRow(UTF8String.fromString("jack"), 27)
+    ColumnVectorUtils.populate(vector, InternalRow(row), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getChild(0).getUTF8String(i) == UTF8String.fromString("jack"))
+      assert(vector.getChild(1).getInt(i) == 27)
+    }
+  }
+
+  testConstantColumnVector("fill struct with null field", 10,
+    new StructType()
+      .add(StructField("name", StringType, nullable = true))
+      .add(StructField("age", IntegerType))) { vector =>
+    val row = InternalRow(null, 27)
+    ColumnVectorUtils.populate(vector, InternalRow(row), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getChild(0).isNullAt(i))
+      assert(vector.getChild(1).getInt(i) == 27)
+    }
+  }
+
+  testConstantColumnVector("fill nested struct", 10,
+    new StructType()
+      .add(StructField("inner",
+        new StructType()
+          .add(StructField("k", StringType))
+          .add(StructField("v", IntegerType))))
+      .add(StructField("flag", BooleanType))) { vector =>
+    val inner = InternalRow(UTF8String.fromString("a"), 1)
+    val outer = InternalRow(inner, true)
+    ColumnVectorUtils.populate(vector, InternalRow(outer), 0)
+    (0 until 10).foreach { i =>
+      val s = vector.getChild(0)
+      assert(s.getChild(0).getUTF8String(i) == UTF8String.fromString("a"))
+      assert(s.getChild(1).getInt(i) == 1)
+      assert(vector.getChild(1).getBoolean(i))
+    }
+  }
+
+  testConstantColumnVector("fill nested array<struct>", 10,
+    ArrayType(new StructType()
+      .add(StructField("k", StringType))
+      .add(StructField("v", IntegerType)))) { vector =>
+    val structs = new GenericArrayData(Array[Any](
+      InternalRow(UTF8String.fromString("a"), 1),
+      InternalRow(UTF8String.fromString("bb"), 2),
+      InternalRow(null, 3)))
+    ColumnVectorUtils.populate(vector, InternalRow(structs), 0)
+    (0 until 10).foreach { i =>
+      val a = vector.getArray(i)
+      assert(a.numElements() == 3)
+      assert(a.getStruct(0, 2).getUTF8String(0) == UTF8String.fromString("a"))
+      assert(a.getStruct(0, 2).getInt(1) == 1)
+      assert(a.getStruct(1, 2).getUTF8String(0) == UTF8String.fromString("bb"))
+      assert(a.getStruct(1, 2).getInt(1) == 2)
+      assert(a.getStruct(2, 2).isNullAt(0))
+      assert(a.getStruct(2, 2).getInt(1) == 3)
+    }
+  }
+
+  testConstantColumnVector("fill null array", 10, ArrayType(IntegerType)) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(null), 0)
+    assert(vector.hasNull)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 0f2ca93f287c7..40f73450eb21d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -48,6 +48,38 @@ import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String, VariantVal}
 import org.apache.spark.util.ArrayImplicits._
 
+/**
+ * A minimal UDT backed by IntegerType, used by SPARK-55897 tests.
+ */
+@SQLUserDefinedType(udt = classOf[TestIntUDT])
+private case class TestIntWrapper(value: Int)
+
+private class TestIntUDT extends UserDefinedType[TestIntWrapper] {
+  override def sqlType: DataType = IntegerType
+  override def serialize(obj: TestIntWrapper): Any = obj.value
+  override def userClass: Class[TestIntWrapper] = classOf[TestIntWrapper]
+  override def deserialize(datum: Any): TestIntWrapper = datum match {
+    case v: Int => TestIntWrapper(v)
+  }
+}
+
+/**
+ * A minimal UDT backed by StructType, used by SPARK-55897 tests.
+ */
+@SQLUserDefinedType(udt = classOf[TestStructWrapperUDT])
+private case class TestStructWrapper(x: Int, y: Long)
+
+private class TestStructWrapperUDT extends UserDefinedType[TestStructWrapper] {
+  override def sqlType: DataType = new StructType()
+    .add("x", IntegerType)
+    .add("y", LongType)
+  override def serialize(obj: TestStructWrapper): Any = InternalRow(obj.x, obj.y)
+  override def userClass: Class[TestStructWrapper] = classOf[TestStructWrapper]
+  override def deserialize(datum: Any): TestStructWrapper = datum match {
+    case row: InternalRow => TestStructWrapper(row.getInt(0), row.getLong(1))
+  }
+}
+
 @ExtendedSQLTest
 class ColumnarBatchSuite extends SparkFunSuite {
 
@@ -2071,4 +2103,93 @@ class ColumnarBatchSuite extends SparkFunSuite {
         }
       }
   }
+
+  testVector(
+    "SPARK-55897: ColumnarRow.get with primitive-backed UDT",
+    10,
+    new StructType().add("name", StringType).add("udt_field", IntegerType)) { column =>
+      column.getChild(0).putByteArray(0, "hello".getBytes)
+      column.getChild(1).putInt(0, 42)
+
+      val row = column.getStruct(0)
+      assert(row.get(1, new TestIntUDT()) === 42)
+  }
+
+  testVector(
+    "SPARK-55897: ColumnarRow.get with struct-backed UDT",
+    10,
+    new StructType()
+      .add("id", IntegerType)
+      .add("nested", new StructType().add("x", IntegerType).add("y", LongType))) { column =>
+      column.getChild(0).putInt(0, 1)
+      column.getChild(1).getChild(0).putInt(0, 10)
+      column.getChild(1).getChild(1).putLong(0, 20L)
+
+      val row = column.getStruct(0)
+      val nested = row.get(1, new TestStructWrapperUDT()).asInstanceOf[InternalRow]
+      assert(nested.getInt(0) === 10)
+      assert(nested.getLong(1) === 20L)
+  }
+
+  testVector(
+    "SPARK-55897: ColumnarArray.get with primitive-backed UDT",
+    10,
+    new ArrayType(IntegerType, false)) { column =>
+      val data = column.arrayData()
+      data.putInt(0, 10)
+      data.putInt(1, 20)
+      column.putArray(0, 0, 2)
+
+      val arr = column.getArray(0)
+      assert(arr.get(0, new TestIntUDT()) === 10)
+      assert(arr.get(1, new TestIntUDT()) === 20)
+  }
+
+  testVector(
+    "SPARK-55897: ColumnarArray.get with struct-backed UDT",
+    10,
+    new ArrayType(new StructType().add("x", IntegerType).add("y", LongType), false)) { column =>
+      val data = column.arrayData()
+      data.getChild(0).putInt(0, 100)
+      data.getChild(1).putLong(0, 200L)
+      column.putArray(0, 0, 1)
+
+      val arr = column.getArray(0)
+      val row = arr.get(0, new TestStructWrapperUDT()).asInstanceOf[InternalRow]
+      assert(row.getInt(0) === 100)
+      assert(row.getLong(1) === 200L)
+  }
+
+  test("SPARK-55897: ColumnarBatchRow.get with primitive-backed UDT") {
+    Seq(MemoryMode.ON_HEAP, MemoryMode.OFF_HEAP).foreach { memMode =>
+      val col = allocate(10, IntegerType, memMode)
+      try {
+        col.putInt(0, 99)
+        val batchRow = new ColumnarBatchRow(Array(col))
+        batchRow.rowId = 0
+        assert(batchRow.get(0, new TestIntUDT()) === 99)
+      } finally {
+        col.close()
+      }
+    }
+  }
+
+  test("SPARK-55897: ColumnarBatchRow.get with struct-backed UDT") {
+    Seq(MemoryMode.ON_HEAP, MemoryMode.OFF_HEAP).foreach { memMode =>
+      val col = allocate(10,
+        new StructType().add("x", IntegerType).add("y", LongType), memMode)
+      try {
+        col.getChild(0).putInt(0, 5)
+        col.getChild(1).putLong(0, 15L)
+        val batchRow = new ColumnarBatchRow(Array(col))
+        batchRow.rowId = 0
+
+        val row = batchRow.get(0, new TestStructWrapperUDT()).asInstanceOf[InternalRow]
+        assert(row.getInt(0) === 5)
+        assert(row.getLong(1) === 15L)
+      } finally {
+        col.close()
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowFunctionSuite.scala
new file mode 100644
index 0000000000000..d6d18cff4df5b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowFunctionSuite.scala
@@ -0,0 +1,896 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import org.apache.spark.sql.{DataFrame, Encoder, Encoders, QueryTest, Row}
+import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, Window}
+import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DataType, LongType, StructType}
+import org.apache.spark.util.SparkErrorUtils
+
+/**
+ * End-to-end tests for the block-chunked segment-tree moving window frame.
+ * Covers basic aggregates, frame boundaries, min-rows fallback, NULL/NaN,
+ * numeric/string/date-timestamp types, RANGE, Decimal/Binary merge, UDAF
+ * fallback, and frame lifecycle.
+ */
+class SegmentTreeWindowFunctionSuite extends QueryTest with SharedSparkSession {
+
+  import testImplicits._
+
+  // Force seg-tree path regardless of partition size (fallback exercised explicitly).
+  private val enableSegTree: Map[String, String] = Map(
+    SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+    SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "1")
+
+  private val disableSegTree: Map[String, String] = Map(
+    SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "false")
+
+  /** Build `f(conf)` twice (enabled / disabled) and assert equal results. */
+  private def checkEquivalence(build: () => DataFrame): Unit = {
+    val baseline: Seq[Row] = withSQLConf(disableSegTree.toSeq: _*) {
+      build().collect().toSeq
+    }
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val actual = build().collect().toSeq
+      // Use QueryTest.sameRows (which normalizes Array[_] via prepareRow before
+      // sorting) instead of sortBy(_.toString): Row.toString on Array[Byte]
+      // and similar ref-typed values is address-based and can reorder baseline
+      // vs actual differently even when the multiset of rows is identical.
+      QueryTest.sameRows(baseline, actual, isSorted = false).foreach { err =>
+        fail(s"segment-tree output differs from baseline.\n$err")
+      }
+    }
+  }
+
+  /** Standard fixture: 3 partitions, sizes 40/40/40, values = row index. */
+  private def baseDF: DataFrame = {
+    spark.range(0, 120).selectExpr(
+      "id",
+      "(id % 3) AS pk",
+      "CAST(id AS INT) AS v")
+  }
+
+  private def winSpec(lo: Int, hi: Int) =
+    Window.partitionBy($"pk").orderBy($"id").rowsBetween(lo, hi)
+
+  // A1: basic aggregate equivalence
+
+  test("MIN over ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk", min($"v").over(winSpec(-3, 3)).as("agg")))
+  }
+
+  test("MAX over ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk", max($"v").over(winSpec(-3, 3)).as("agg")))
+  }
+
+  test("SUM over ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk", sum($"v").over(winSpec(-3, 3)).as("agg")))
+  }
+
+  test("COUNT over ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk", count($"v").over(winSpec(-3, 3)).as("agg")))
+  }
+
+  test("AVG over ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk", avg($"v").over(winSpec(-3, 3)).as("agg")))
+  }
+
+  test("MIN + MAX + SUM share a single window frame") {
+    checkEquivalence(() =>
+      baseDF.select(
+        $"id",
+        $"pk",
+        min($"v").over(winSpec(-3, 3)).as("mn"),
+        max($"v").over(winSpec(-3, 3)).as("mx"),
+        sum($"v").over(winSpec(-3, 3)).as("sm")))
+  }
+
+  // A2: frame-size boundaries
+
+  test("frame size = 1 (CURRENT ROW only)") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk", sum($"v").over(winSpec(0, 0)).as("agg")))
+  }
+
+  test("frame spans full partition") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk", sum($"v").over(winSpec(-100, 100)).as("agg")))
+  }
+
+  test("frame extends past both partition edges") {
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk",
+        sum($"v").over(winSpec(-50, 50)).as("agg"),
+        min($"v").over(winSpec(-50, 50)).as("mn"),
+        max($"v").over(winSpec(-50, 50)).as("mx")))
+  }
+
+
+  test("partition below minPartitionRows falls back to SlidingWindowFunctionFrame") {
+    // 5-row partition, min threshold = 10 -> must fall back.
+    val df = spark.range(0, 5).selectExpr(
+      "id", "0 AS pk", "CAST(id AS INT) AS v")
+    val enabledConf = Map(
+      SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+      SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "10")
+
+    val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+      df.select($"id", sum($"v").over(winSpec(-1, 1)).as("s"))
+        .collect().sortBy(_.toString)
+    }
+    val actual = withSQLConf(enabledConf.toSeq: _*) {
+      df.select($"id", sum($"v").over(winSpec(-1, 1)).as("s"))
+        .collect().sortBy(_.toString)
+    }
+    assert(actual.toSeq === baseline.toSeq)
+
+    // Confirm the fallback flag actually flipped.
+    withSQLConf(enabledConf.toSeq: _*) {
+      SegmentTreeWindowTestHelper.withSmallPartitionFrame(
+        SQLConf.get, rows = 5) { frame =>
+        assert(frame.fallbackUsed,
+          "expected fallbackUsed=true for partition smaller than minPartitionRows")
+      }
+    }
+  }
+
+
+  test("NTH_VALUE over ROWS frame falls back cleanly (no mergeExpressions crash)") {
+    // NthValue extends DeclarativeAggregate but its mergeExpressions throws
+    // mergeUnsupportedByWindowFunctionError. eligibleForSegTree must exclude it.
+    val df = baseDF
+    val withSegTree = withSQLConf(enableSegTree.toSeq: _*) {
+      df.selectExpr(
+        "id", "pk",
+        "nth_value(v, 3) OVER (PARTITION BY pk ORDER BY id " +
+          "ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING) AS n3")
+        .collect().sortBy(_.toString)
+    }
+    val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+      df.selectExpr(
+        "id", "pk",
+        "nth_value(v, 3) OVER (PARTITION BY pk ORDER BY id " +
+          "ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING) AS n3")
+        .collect().sortBy(_.toString)
+    }
+    assert(withSegTree.toSeq === baseline.toSeq)
+  }
+
+  test("ROW_NUMBER over ROWS frame falls back cleanly (no mergeExpressions crash)") {
+    val df = baseDF
+    val withSegTree = withSQLConf(enableSegTree.toSeq: _*) {
+      df.selectExpr(
+        "id", "pk",
+        "row_number() OVER (PARTITION BY pk ORDER BY id) AS rn")
+        .collect().sortBy(_.toString)
+    }
+    val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+      df.selectExpr(
+        "id", "pk",
+        "row_number() OVER (PARTITION BY pk ORDER BY id) AS rn")
+        .collect().sortBy(_.toString)
+    }
+    assert(withSegTree.toSeq === baseline.toSeq)
+  }
+
+  // A3: NULL/NaN/Infinity; A4: numeric/string/date-timestamp types.
+  // Unsupported-merge / DISTINCT / feature-flag fallback.
+  // Oracle: run with seg-tree enabled and disabled, assert equal Row sequences.
+
+  // A3: NULL / special values
+
+  test("all-NULL column: MIN/MAX/SUM/AVG/COUNT") {
+    val df = spark.range(0, 30).selectExpr(
+      "id", "(id % 3) AS pk", "CAST(NULL AS INT) AS v")
+    checkEquivalence(() =>
+      df.select($"id", $"pk",
+        min($"v").over(winSpec(-3, 3)).as("mn"),
+        max($"v").over(winSpec(-3, 3)).as("mx"),
+        sum($"v").over(winSpec(-3, 3)).as("sm"),
+        avg($"v").over(winSpec(-3, 3)).as("av"),
+        count($"v").over(winSpec(-3, 3)).as("cn")))
+  }
+
+  test("mixed NULL and non-NULL: NULLs must not leak into MIN/MAX") {
+    // Every 3rd value is NULL. Aggregates must skip them (NULL-agnostic merge).
+    val df = spark.range(0, 60).selectExpr(
+      "id",
+      "(id % 3) AS pk",
+      "CASE WHEN id % 3 = 0 THEN NULL ELSE CAST(id AS INT) END AS v")
+    checkEquivalence(() =>
+      df.select($"id", $"pk",
+        min($"v").over(winSpec(-4, 4)).as("mn"),
+        max($"v").over(winSpec(-4, 4)).as("mx"),
+        sum($"v").over(winSpec(-4, 4)).as("sm"),
+        count($"v").over(winSpec(-4, 4)).as("cn")))
+  }
+
+  test("Double NaN and +/-Infinity propagate correctly through MIN/MAX/SUM") {
+    // Trap: NaN > +Inf in Spark's MIN/MAX ordering; +Inf + -Inf = NaN in SUM.
+    // Seg-tree uses DeclarativeAggregate.merge; behavior must match baseline.
+    val df = spark.range(0, 30).selectExpr(
+      "id",
+      "(id % 2) AS pk",
+      """CASE
+           WHEN id % 7 = 0 THEN double('NaN')
+           WHEN id % 7 = 1 THEN double('Infinity')
+           WHEN id % 7 = 2 THEN double('-Infinity')
+           ELSE CAST(id AS DOUBLE)
+         END AS v""")
+    checkEquivalence(() =>
+      df.select($"id", $"pk",
+        min($"v").over(winSpec(-3, 3)).as("mn"),
+        max($"v").over(winSpec(-3, 3)).as("mx"),
+        sum($"v").over(winSpec(-3, 3)).as("sm")))
+  }
+
+  // A4: data types
+
+  test("numeric types: Int / Long / Double / Decimal") {
+    val df = spark.range(0, 60).selectExpr(
+      "id",
+      "(id % 3) AS pk",
+      "CAST(id AS INT)             AS vi",
+      "CAST(id * 1000000L AS LONG) AS vl",
+      "CAST(id AS DOUBLE) + 0.25   AS vd",
+      "CAST(id AS DECIMAL(20,4))   AS vdec")
+    checkEquivalence(() =>
+      df.select($"id", $"pk",
+        sum($"vi").over(winSpec(-2, 2)).as("si"),
+        min($"vl").over(winSpec(-2, 2)).as("ml"),
+        max($"vd").over(winSpec(-2, 2)).as("xd"),
+        sum($"vdec").over(winSpec(-2, 2)).as("sdec"),
+        avg($"vdec").over(winSpec(-2, 2)).as("adec")))
+  }
+
+  test("String lexicographic MIN/MAX") {
+    // Non-monotone values so MIN/MAX exercise the seg-tree merge (not edge).
+    val df = spark.range(0, 40).selectExpr(
+      "id",
+      "(id % 2) AS pk",
+      "CONCAT('s', LPAD(CAST((id * 37) % 97 AS STRING), 3, '0')) AS v")
+    checkEquivalence(() =>
+      df.select($"id", $"pk",
+        min($"v").over(winSpec(-3, 3)).as("mn"),
+        max($"v").over(winSpec(-3, 3)).as("mx")))
+  }
+
+  test("Date / Timestamp MIN/MAX") {
+    val df = spark.range(0, 40).selectExpr(
+      "id",
+      "(id % 2) AS pk",
+      "date_add(DATE'2020-01-01', CAST((id * 13) % 365 AS INT)) AS vd",
+      "CAST(TIMESTAMP'2020-01-01 00:00:00' + " +
+        "make_interval(0, 0, 0, 0, 0, 0, CAST(id AS DECIMAL(18,6))) AS TIMESTAMP) AS vt")
+    checkEquivalence(() =>
+      df.select($"id", $"pk",
+        min($"vd").over(winSpec(-3, 3)).as("mnd"),
+        max($"vd").over(winSpec(-3, 3)).as("mxd"),
+        min($"vt").over(winSpec(-3, 3)).as("mnt"),
+        max($"vt").over(winSpec(-3, 3)).as("mxt")))
+  }
+
+
+  test("collect_list falls back cleanly (non-DeclarativeAggregate)") {
+    // collect_list is TypedImperativeAggregate; eligibleForSegTree must decline.
+    checkEquivalence(() =>
+      baseDF.select($"id", $"pk",
+        collect_list($"v").over(winSpec(-2, 2)).as("lst")))
+  }
+
+  test("DISTINCT window aggregate is rejected by analyzer regardless of seg-tree flag") {
+    // Analyzer throws DISTINCT_WINDOW_FUNCTION_UNSUPPORTED before frame
+    // construction; seg-tree flag must not alter this behavior.
+    def run(): Unit = {
+      baseDF.select($"id", $"pk",
+        count_distinct($"v").over(winSpec(-3, 3)).as("cd")).collect()
+    }
+    withSQLConf(disableSegTree.toSeq: _*) {
+      val e = intercept[org.apache.spark.sql.AnalysisException](run())
+      assert(e.getMessage.contains("DISTINCT_WINDOW_FUNCTION_UNSUPPORTED"))
+    }
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val e = intercept[org.apache.spark.sql.AnalysisException](run())
+      assert(e.getMessage.contains("DISTINCT_WINDOW_FUNCTION_UNSUPPORTED"))
+    }
+  }
+
+  test("feature flag off: segmentTree.enabled=false yields baseline semantics") {
+    // Sanity: disabling the flag on a seg-tree-eligible workload still
+    // produces the SlidingWindowFunctionFrame answer.
+    val df = baseDF
+    val expected = withSQLConf(disableSegTree.toSeq: _*) {
+      df.select($"id", $"pk", min($"v").over(winSpec(-3, 3)).as("mn"))
+        .collect().sortBy(_.toString).toSeq
+    }
+    withSQLConf(
+      SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "false",
+      SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "1024") {
+      val actual = df.select($"id", $"pk", min($"v").over(winSpec(-3, 3)).as("mn"))
+        .collect().sortBy(_.toString).toSeq
+      assert(actual === expected)
+    }
+  }
+
+    // A5: RANGE frame equivalence (single-order-expr admission).
+  // MIN/MAX non-invertible, guaranteeing seg-tree path is exercised.
+
+  /** Run `sql` twice (flag off / on) and checkAnswer equality. */
+  private def checkRangeEquivalence(df: DataFrame, query: String): Unit = {
+    df.createOrReplaceTempView("t")
+    try {
+      val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+        spark.sql(query).collect().sortBy(_.toString)
+      }
+      withSQLConf(enableSegTree.toSeq: _*) {
+        val actual = spark.sql(query).collect().sortBy(_.toString)
+        assert(actual.toSeq === baseline.toSeq,
+          s"segment-tree output differs from baseline.\nExpected: ${baseline.toSeq}\n" +
+            s"Actual:   ${actual.toSeq}")
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  test("-- RANGE INT offset basic (non-uniform gaps, MIN/MAX)") {
+    // Non-uniform gaps so admit/drop loops must consult the order-key comparator.
+    val df = spark.range(0, 40).selectExpr(
+      "CAST(id AS INT) AS id",
+      "(CAST(id AS INT) % 2) AS pk",
+      "CAST(CASE CAST(id AS INT) % 7 " +
+        "WHEN 0 THEN 1 WHEN 1 THEN 3 WHEN 2 THEN 4 WHEN 3 THEN 4 " +
+        "WHEN 4 THEN 7 WHEN 5 THEN 10 ELSE 15 END + (CAST(id AS INT) / 7) * 20 AS INT) AS k",
+      "CAST((id * 31) % 97 AS INT) AS v")
+    checkRangeEquivalence(df,
+      """SELECT id, pk,
+        |  MIN(v) OVER (PARTITION BY pk ORDER BY k
+        |    RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS mn,
+        |  MAX(v) OVER (PARTITION BY pk ORDER BY k
+        |    RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS mx
+        |FROM t""".stripMargin)
+  }
+
+  test("-- RANGE Timestamp with INTERVAL offset (MAX)") {
+    // Irregular gaps force the timestamp comparator at the 1-hour boundary.
+    val df = spark.range(0, 30).selectExpr(
+      "CAST(id AS INT) AS id",
+      "(CAST(id AS INT) % 2) AS pk",
+      "CAST(TIMESTAMP'2024-01-01 10:00:00' + " +
+        "make_interval(0, 0, 0, 0, 0, 30 * CAST(id AS INT) * " +
+        "(CASE CAST(id AS INT) % 3 WHEN 0 THEN 1 WHEN 1 THEN 3 ELSE 4 END), 0) " +
+        "AS TIMESTAMP) AS ts",
+      "CAST((id * 17) % 53 AS INT) AS v")
+    checkRangeEquivalence(df,
+      """SELECT id, pk,
+        |  MAX(v) OVER (PARTITION BY pk ORDER BY ts
+        |    RANGE BETWEEN INTERVAL '1' HOUR PRECEDING
+        |              AND INTERVAL '1' HOUR FOLLOWING) AS mx
+        |FROM t""".stripMargin)
+  }
+
+  test("-- RANGE with tie (duplicate order keys) inclusion at boundary") {
+    // Trap: RANGE `0 PRECEDING AND 0 FOLLOWING` must include the FULL tie
+    // group at the current row's key, not just the current row. A ROWS-vs-
+    // RANGE confusion would return per-row MIN/MAX instead of group-level.
+    val rows = (0 until 40).map { i =>
+      val k = Seq(1, 2, 2, 2, 3, 4, 5)(i % 7)
+      (i, i % 2, k, (i * 13) % 41)
+    }
+    val df = rows.toDF("id", "pk", "k", "v")
+    checkRangeEquivalence(df,
+      """SELECT id, pk, k,
+        |  MIN(v) OVER (PARTITION BY pk ORDER BY k
+        |    RANGE BETWEEN 0 PRECEDING AND 0 FOLLOWING) AS mn,
+        |  MAX(v) OVER (PARTITION BY pk ORDER BY k
+        |    RANGE BETWEEN 0 PRECEDING AND 0 FOLLOWING) AS mx
+        |FROM t""".stripMargin)
+  }
+
+  test("-- RANGE frame wider than partition (C4: admit/drop loops no-op)") {
+    // Once the first batch is admitted, admit/drop must detect no change
+    // and skip work.
+    val df = spark.range(0, 25).selectExpr(
+      "CAST(id AS INT) AS id",
+      "(CAST(id AS INT) / 5) AS pk",
+      "CAST((id * 7) % 23 AS INT) AS k",
+      "CAST((id * 19) % 101 AS INT) AS v")
+    checkRangeEquivalence(df,
+      """SELECT id, pk,
+        |  MIN(v) OVER (PARTITION BY pk ORDER BY k
+        |    RANGE BETWEEN 100 PRECEDING AND 100 FOLLOWING) AS mn,
+        |  MAX(v) OVER (PARTITION BY pk ORDER BY k
+        |    RANGE BETWEEN 100 PRECEDING AND 100 FOLLOWING) AS mx
+        |FROM t""".stripMargin)
+  }
+
+  test("-- RANGE with NULL order key (NULLS FIRST / NULLS LAST)") {
+    // Trap: Spark groups all NULLs into a single equivalence class at head
+    // (NULLS FIRST) or tail (NULLS LAST); seg-tree must treat NULL as a
+    // tie group identical to the sliding baseline.
+    val rows = (0 until 36).map { i =>
+      val kOpt: Option[Int] = (i % 6) match {
+        case 0 | 1 | 5 => None
+        case 2 => Some(1)
+        case 3 => Some(2)
+        case _ => Some(3)
+      }
+      (i, i % 2, kOpt, (i * 11) % 37)
+    }
+    val df = rows.toDF("id", "pk", "k", "v")
+    checkRangeEquivalence(df,
+      """SELECT id, pk,
+        |  MIN(v) OVER (PARTITION BY pk ORDER BY k ASC NULLS FIRST
+        |    RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS mn_nf,
+        |  MAX(v) OVER (PARTITION BY pk ORDER BY k ASC NULLS FIRST
+        |    RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS mx_nf,
+        |  MIN(v) OVER (PARTITION BY pk ORDER BY k ASC NULLS LAST
+        |    RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS mn_nl,
+        |  MAX(v) OVER (PARTITION BY pk ORDER BY k ASC NULLS LAST
+        |    RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS mx_nl
+        |FROM t""".stripMargin)
+  }
+
+    // Decimal overflow / BinaryType MIN/MAX across block merge; UDAF fallback.
+  // Trap: blockSize=16 is SQLConf minimum; frame > blockSize ensures the
+  // seg-tree merge path is actually crossed.
+
+  private val segTreeBlock: String = "16"
+  private val segTreeFramePrec: Int = 17
+  private val segTreeRows: Int = 20
+
+  private def withSegTreeBlock(conf: (String, String)*)(body: => Unit): Unit = {
+    val extra = Seq(SQLConf.WINDOW_SEGMENT_TREE_BLOCK_SIZE.key -> segTreeBlock) ++ conf
+    withSQLConf(extra: _*)(body)
+  }
+
+
+  /**
+   * 20 rows in one partition, Decimal(38, 0) values near the type's upper
+   * bound; frame of `segTreeFramePrec` PRECEDING..CURRENT ROW makes any
+   * >=2-row window overflow Sum. Block 16 + frame 17 forces cross-block merge.
+   */
+  private def decimalOverflowDF: DataFrame = {
+    // 9e37 -- below Decimal(38,0) MAX (~9.99e37), but 2x overflows.
+    val big = "90000000000000000000000000000000000000"  // 38 digits
+    spark.range(0, segTreeRows.toLong).selectExpr(
+      "CAST(id AS INT) AS id",
+      "0 AS pk",
+      s"CAST('$big' AS DECIMAL(38, 0)) AS v")
+  }
+
+  private val decimalOverflowSql: String =
+    s"""SELECT id, pk,
+       |  SUM(v) OVER (PARTITION BY pk ORDER BY id
+       |    ROWS BETWEEN $segTreeFramePrec PRECEDING AND CURRENT ROW) AS s
+       |FROM t""".stripMargin
+
+  test("a -- Decimal overflow ANSI on, seg-tree matches sliding (both throw)") {
+    val df = decimalOverflowDF
+    df.createOrReplaceTempView("t")
+    try {
+      withSegTreeBlock(SQLConf.ANSI_ENABLED.key -> "true") {
+        withSQLConf(disableSegTree.toSeq: _*) {
+          val e = intercept[Exception] {
+            spark.sql(decimalOverflowSql).collect()
+          }
+          assert(hasArithmeticCause(e),
+            s"expected ArithmeticException root cause, got: ${e.getMessage}")
+        }
+        withSQLConf(enableSegTree.toSeq: _*) {
+          val e = intercept[Exception] {
+            spark.sql(decimalOverflowSql).collect()
+          }
+          assert(hasArithmeticCause(e),
+            s"expected ArithmeticException root cause, got: ${e.getMessage}")
+        }
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  test("b -- Decimal overflow ANSI off, seg-tree matches sliding (NULL on overflow)") {
+    val df = decimalOverflowDF
+    df.createOrReplaceTempView("t")
+    try {
+      withSegTreeBlock(SQLConf.ANSI_ENABLED.key -> "false") {
+        val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+          spark.sql(decimalOverflowSql).collect().sortBy(_.toString)
+        }
+        // At least one row must be NULL so we know overflow actually fired.
+        assert(baseline.exists(_.isNullAt(2)),
+          "baseline should contain NULL overflow rows; test data may be too small")
+        withSQLConf(enableSegTree.toSeq: _*) {
+          val actual = spark.sql(decimalOverflowSql).collect().sortBy(_.toString)
+          assert(actual.toSeq === baseline.toSeq)
+        }
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  test("c -- mid-window Decimal overflow slides past (seg-tree == sliding)") {
+    // Big values at ids 14..17 straddle block boundary at id=16, so any
+    // 4-row window overlapping >=2 of them overflows (-> NULL when ANSI off)
+    // and cross-block merge sees overflowing buffers. Rows past id=20 slide
+    // clear and recover non-NULL.
+    val big = "90000000000000000000000000000000000000"
+    val df = spark.range(0, 24).selectExpr(
+      "CAST(id AS INT) AS id",
+      "0 AS pk",
+      s"""CASE WHEN id IN (14, 15, 16, 17)
+              THEN CAST('$big' AS DECIMAL(38, 0))
+              ELSE CAST(id AS DECIMAL(38, 0))
+         END AS v""")
+    df.createOrReplaceTempView("t")
+    try {
+      val sqlStr =
+        """SELECT id, pk,
+          |  SUM(v) OVER (PARTITION BY pk ORDER BY id
+          |    ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS s
+          |FROM t""".stripMargin
+      withSegTreeBlock(SQLConf.ANSI_ENABLED.key -> "false") {
+        val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+          spark.sql(sqlStr).collect().sortBy(_.toString)
+        }
+        // Sanity: overflow fired AND later rows recover non-NULL.
+        assert(baseline.exists(_.isNullAt(2)),
+          "baseline should contain NULL overflow rows")
+        assert(baseline.exists(r => r.getInt(0) >= 21 && !r.isNullAt(2)),
+          "rows with id>=21 should be non-NULL (window slid past big values)")
+        withSQLConf(enableSegTree.toSeq: _*) {
+          val actual = spark.sql(sqlStr).collect().sortBy(_.toString)
+          assert(actual.toSeq === baseline.toSeq)
+        }
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  /** True iff the root cause of `t` is an [[ArithmeticException]] (ANSI overflow). */
+  private def hasArithmeticCause(t: Throwable): Boolean =
+    Option(SparkErrorUtils.getRootCause(t)).exists(_.isInstanceOf[ArithmeticException])
+
+
+  /** Pattern of 20 Array[Byte] values used across a. */
+  private def binaryVariedRows: Seq[(Int, Array[Byte])] = {
+    (0 until 20).map { i =>
+      val arr: Array[Byte] = (i % 8) match {
+        case 0 => Array[Byte](0x01, 0x02)
+        case 1 => Array[Byte](0x00)
+        case 2 => Array[Byte](0x7f)
+        case 3 => Array[Byte](0x7f, 0x00)
+        case 4 => Array[Byte](0x10, 0x20, 0x30)
+        case 5 => Array[Byte](0x10, 0x20)
+        case 6 => Array[Byte](0x10)
+        case _ => Array[Byte](0x05, 0x05, 0x05, 0x05)
+      }
+      (i, arr)
+    }
+  }
+
+  test("a -- BinaryType MIN/MAX cross-block merge") {
+    // Varied lengths/content; frame > blockSize guarantees merge path hit.
+    val df = binaryVariedRows.toDF("id", "v").selectExpr("id", "0 AS pk", "v")
+    df.createOrReplaceTempView("t")
+    try {
+      withSegTreeBlock() {
+        val sqlStr =
+          s"""SELECT id, pk,
+             |  MIN(v) OVER (PARTITION BY pk ORDER BY id
+             |    ROWS BETWEEN $segTreeFramePrec PRECEDING AND CURRENT ROW) AS mn,
+             |  MAX(v) OVER (PARTITION BY pk ORDER BY id
+             |    ROWS BETWEEN $segTreeFramePrec PRECEDING AND CURRENT ROW) AS mx
+             |FROM t""".stripMargin
+        val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+          spark.sql(sqlStr).collect().toSeq
+        }
+        withSQLConf(enableSegTree.toSeq: _*) {
+          val actual = spark.sql(sqlStr).collect().toSeq
+          QueryTest.sameRows(baseline, actual, isSorted = false).foreach { err =>
+            fail(s"seg-tree binary MIN/MAX differs.\n$err")
+          }
+        }
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  test("b -- BinaryType empty/NULL/single-zero distinction") {
+    // Trap: Spark treats empty-array and NULL as distinct; seg-tree must
+    // respect that. Pattern cycles across the block boundary.
+    val rows: Seq[(Int, Array[Byte])] = (0 until 20).map { i =>
+      val arr: Array[Byte] = (i % 4) match {
+        case 0 => Array[Byte](0x00)
+        case 1 => Array[Byte]()
+        case 2 => null
+        case _ => Array[Byte](0x01, 0x02)
+      }
+      (i, arr)
+    }
+    val df = rows.toDF("id", "v").selectExpr("id", "0 AS pk", "v")
+    df.createOrReplaceTempView("t")
+    try {
+      withSegTreeBlock() {
+        val sqlStr =
+          s"""SELECT id, pk,
+             |  MIN(v) OVER (PARTITION BY pk ORDER BY id
+             |    ROWS BETWEEN $segTreeFramePrec PRECEDING AND CURRENT ROW) AS mn,
+             |  MAX(v) OVER (PARTITION BY pk ORDER BY id
+             |    ROWS BETWEEN $segTreeFramePrec PRECEDING AND CURRENT ROW) AS mx
+             |FROM t""".stripMargin
+        val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+          spark.sql(sqlStr).collect().toSeq
+        }
+        withSQLConf(enableSegTree.toSeq: _*) {
+          val actual = spark.sql(sqlStr).collect().toSeq
+          QueryTest.sameRows(baseline, actual, isSorted = false).foreach { err =>
+            fail(s"seg-tree empty/NULL binary differs.\n$err")
+          }
+        }
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  test("c -- BinaryType unsigned lexicographic ordering") {
+    // Trap: Spark's BinaryType comparator is unsigned (0xFF > 0x01); a
+    // signed-byte comparator would get this backwards. Seg-tree must match.
+    val unsignedPattern: IndexedSeq[Array[Byte]] = IndexedSeq(
+      Array[Byte](0xff.toByte),
+      Array[Byte](0x01),
+      Array[Byte](0x80.toByte, 0x00),
+      Array[Byte](0x7f, 0xff.toByte),
+      Array[Byte](0xfe.toByte),
+      Array[Byte](0x00, 0xff.toByte),
+      Array[Byte](0x80.toByte),
+      Array[Byte](0x7f))
+    val rows: Seq[(Int, Array[Byte])] =
+      (0 until 20).map(i => (i, unsignedPattern(i % unsignedPattern.length)))
+    val df = rows.toDF("id", "v").selectExpr("id", "0 AS pk", "v")
+    df.createOrReplaceTempView("t")
+    try {
+      withSegTreeBlock() {
+        val sqlStr =
+          s"""SELECT id, pk,
+             |  MIN(v) OVER (PARTITION BY pk ORDER BY id
+             |    ROWS BETWEEN $segTreeFramePrec PRECEDING AND CURRENT ROW) AS mn,
+             |  MAX(v) OVER (PARTITION BY pk ORDER BY id
+             |    ROWS BETWEEN $segTreeFramePrec PRECEDING AND CURRENT ROW) AS mx
+             |FROM t""".stripMargin
+        val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+          spark.sql(sqlStr).collect().toSeq
+        }
+        withSQLConf(enableSegTree.toSeq: _*) {
+          val actual = spark.sql(sqlStr).collect().toSeq
+          QueryTest.sameRows(baseline, actual, isSorted = false).foreach { err =>
+            fail(s"seg-tree unsigned binary ordering differs.\n$err")
+          }
+        }
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+    // ScalaUDAF/ScalaAggregator both extend ImperativeAggregate and must be
+  // rejected by `eligibleForSegTree`. Flag ON must not throw (segtree merge on
+  // ImperativeAggregate would NPE) and must match flag OFF bit-for-bit.
+
+  test("a -- legacy ScalaUDAF falls back cleanly (no seg-tree merge)") {
+    val udaf = new LegacySumUdaf
+    spark.udf.register("seg_tree_legacy_sum", udaf)
+    val df = baseDF.selectExpr("id", "pk", "CAST(v AS LONG) AS v")
+    val query =
+      """SELECT id, pk,
+        |  seg_tree_legacy_sum(v) OVER (PARTITION BY pk ORDER BY id
+        |    ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS s
+        |FROM t""".stripMargin
+    df.createOrReplaceTempView("t")
+    try {
+      val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+        spark.sql(query).collect().sortBy(_.toString)
+      }
+      withSQLConf(enableSegTree.toSeq: _*) {
+        val actual = spark.sql(query).collect().sortBy(_.toString)
+        assert(actual.toSeq === baseline.toSeq,
+          s"ScalaUDAF fallback result differs.\nExpected: ${baseline.toSeq}\n" +
+            s"Actual:   ${actual.toSeq}")
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  test("b -- typed Aggregator falls back cleanly (no seg-tree merge)") {
+    val agg = udaf(new LongSumAggregator)
+    spark.udf.register("seg_tree_typed_sum", agg)
+    val df = baseDF.selectExpr("id", "pk", "CAST(v AS LONG) AS v")
+    val query =
+      """SELECT id, pk,
+        |  seg_tree_typed_sum(v) OVER (PARTITION BY pk ORDER BY id
+        |    ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS s
+        |FROM t""".stripMargin
+    df.createOrReplaceTempView("t")
+    try {
+      val baseline = withSQLConf(disableSegTree.toSeq: _*) {
+        spark.sql(query).collect().sortBy(_.toString)
+      }
+      withSQLConf(enableSegTree.toSeq: _*) {
+        val actual = spark.sql(query).collect().sortBy(_.toString)
+        assert(actual.toSeq === baseline.toSeq,
+          s"typed Aggregator fallback result differs.\nExpected: ${baseline.toSeq}\n" +
+            s"Actual:   ${actual.toSeq}")
+      }
+    } finally {
+      spark.catalog.dropTempView("t")
+    }
+  }
+
+  test("SPARK-56546: LAG does not eagerly construct AggregateProcessor under segtree") {
+    // Pre-fix, `val processor` eagerly invoked AggregateProcessor.apply on any
+  // non-empty `functions`, throwing INTERNAL_ERROR for lag(...) when routing
+  // hit FRAME_LESS_OFFSET. Frameless lag below is the minimal repro.
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = spark.range(10).select(
+        col("id"),
+        expr("lag(id, 1, id) OVER (ORDER BY id)").as("lag"))
+      val expected = (0L until 10L).map(i => Row(i, if (i == 0) 0L else i - 1))
+      checkAnswer(df, expected)
+    }
+  }
+
+  // Frame lifecycle
+  //   T1a: no fallback on pure-segtree partition.
+  //   T1b: fallback lazily allocated on small partition.
+  //   T1c: segtree->small reuses frame; fallback allocated once.
+  //   T1d: small->segtree drops retained fallback reference (GC-eligible).
+  //   T2 : throwing fallback.prepare must not flip `fallbackUsed` (frame is
+  //        `final` so we cannot inject a throwing fallback; we rely on the
+  //        structural witness that `fallbackUsed` is set ONLY after
+  //        `fallback.prepare` returns normally -- a regression would fail T1d).
+  //   T3 : close() after only small partition (tree never built) is a no-op.
+
+  test("lifecycle: no fallback allocated on segtree-only partition") {
+    val conf = new SQLConf
+    conf.setConfString(SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key, "1")
+    SegmentTreeWindowTestHelper.withFrameFactory(conf) { factory =>
+      val frame = factory.newFrame()
+      val array = factory.newArray(rows = 10)
+      frame.prepare(array)
+      assert(!frame.fallbackUsed, "segtree path expected")
+      assert(!frame.fallbackAllocated,
+        "fallback must not be allocated on segtree-only partition")
+    }
+  }
+
+  test("lifecycle: fallback lazily allocated on small partition") {
+    val conf = new SQLConf
+    conf.setConfString(SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key, "100")
+    SegmentTreeWindowTestHelper.withFrameFactory(conf) { factory =>
+      val frame = factory.newFrame()
+      assert(!frame.fallbackAllocated, "no allocation before prepare()")
+      frame.prepare(factory.newArray(rows = 5))
+      assert(frame.fallbackUsed, "fallback path expected")
+      assert(frame.fallbackAllocated, "fallback allocated after first small partition")
+    }
+  }
+
+  test("lifecycle: segtree then small partition reuses frame, allocates fallback once") {
+    val conf = new SQLConf
+    conf.setConfString(SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key, "10")
+    SegmentTreeWindowTestHelper.withFrameFactory(conf) { factory =>
+      val frame = factory.newFrame()
+      // Partition 1: segtree path (20 rows >= 10).
+      frame.prepare(factory.newArray(rows = 20))
+      assert(!frame.fallbackUsed && !frame.fallbackAllocated)
+      // Partition 2: small partition.
+      frame.prepare(factory.newArray(rows = 5))
+      assert(frame.fallbackUsed, "fallback path expected on small partition")
+      assert(frame.fallbackAllocated, "fallback allocated on first small partition")
+    }
+  }
+
+  test("lifecycle: small then segtree transition drops fallback reference") {
+    val conf = new SQLConf
+    conf.setConfString(SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key, "10")
+    SegmentTreeWindowTestHelper.withFrameFactory(conf) { factory =>
+      val frame = factory.newFrame()
+      // Partition 1: small -> fallback allocated.
+      frame.prepare(factory.newArray(rows = 5))
+      assert(frame.fallbackAllocated && frame.fallbackUsed)
+      // Partition 2: segtree -> fallback must be dropped (row-copy buffer GC-eligible).
+      frame.prepare(factory.newArray(rows = 20))
+      assert(!frame.fallbackUsed, "segtree path expected")
+      assert(!frame.fallbackAllocated,
+        "retained fallback reference must be dropped on segtree-path re-entry")
+    }
+  }
+
+  test("lifecycle: throwing fallback.prepare must leave fallbackUsed=false") {
+    // Structural-witness test: frame is `final`, can't inject a throwing
+    // fallback. `fallbackUsed = true` is set ONLY after `fallback.prepare`
+    // returns normally (see SegmentTreeWindowFunctionFrame.prepare). A
+    // regression of that ordering would fail T1d. This test re-exercises
+    // the happy path and documents the invariant.
+    val conf = new SQLConf
+    conf.setConfString(SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key, "100")
+    SegmentTreeWindowTestHelper.withFrameFactory(conf) { factory =>
+      val frame = factory.newFrame()
+      frame.prepare(factory.newArray(rows = 5))
+      assert(frame.fallbackUsed, "happy-path post-condition")
+    }
+  }
+
+  test("lifecycle: close() after only small partition is a no-op") {
+    val conf = new SQLConf
+    conf.setConfString(SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key, "100")
+    SegmentTreeWindowTestHelper.withFrameFactory(conf) { factory =>
+      val frame = factory.newFrame()
+      frame.prepare(factory.newArray(rows = 5))
+      assert(frame.fallbackUsed)
+      // Idempotent: no tree was ever built.
+      frame.close()
+      frame.close()
+    }
+  }
+}
+
+/** Legacy UDAF wrapped as ScalaUDAF (ImperativeAggregate); rejected by segtree guard. */
+private class LegacySumUdaf extends UserDefinedAggregateFunction {
+  override def inputSchema: StructType = new StructType().add("v", LongType)
+  override def bufferSchema: StructType = new StructType().add("s", LongType)
+  override def dataType: DataType = LongType
+  override def deterministic: Boolean = true
+  override def initialize(buffer: MutableAggregationBuffer): Unit = {
+    buffer(0) = 0L
+  }
+  override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+    if (!input.isNullAt(0)) {
+      buffer(0) = buffer.getLong(0) + input.getLong(0)
+    }
+  }
+  override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+    buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0)
+  }
+  override def evaluate(buffer: Row): Any = buffer.getLong(0)
+}
+
+/** Typed Aggregator wrapped as ScalaAggregator (TypedImperativeAggregate); rejected. */
+private class LongSumAggregator extends Aggregator[Long, Long, Long] {
+  override def zero: Long = 0L
+  override def reduce(b: Long, a: Long): Long = b + a
+  override def merge(b1: Long, b2: Long): Long = b1 + b2
+  override def finish(r: Long): Long = r
+  override def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowMetricsSuite.scala
new file mode 100644
index 0000000000000..5c3edb7e06c68
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowMetricsSuite.scala
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.SparkPlanInfo
+import org.apache.spark.sql.execution.metric.SQLMetricsTestUtils
+import org.apache.spark.sql.execution.ui.SparkPlanGraph
+import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * SQLMetrics visibility coverage for [[SegmentTreeWindowFunctionFrame]]:
+ * segtree path bumps `numSegmentTreeFrames`; fallback path bumps
+ * `numSegmentTreeFallbackFrames`; feature-flag off leaves both at 0.
+ */
+class SegmentTreeWindowMetricsSuite
+    extends QueryTest with SharedSparkSession with SQLMetricsTestUtils {
+
+  import testImplicits._
+
+  /**
+   * Run `df` and return the first Window node's seg-tree counter values as
+   * Long. (Other metrics like `spillSize` are UI-pretty-printed strings and
+   * are skipped.)
+   */
+  private def windowMetricValues(df: org.apache.spark.sql.DataFrame): Map[String, Long] = {
+    val previousExecutionIds = currentExecutionIds()
+    df.collect()
+    sparkContext.listenerBus.waitUntilEmpty(10000)
+    val executionId = currentExecutionIds().diff(previousExecutionIds).head
+    val metricValues = statusStore.executionMetrics(executionId)
+    val graph = SparkPlanGraph(SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan))
+    val windowNode = graph.allNodes.find(_.name == "Window").getOrElse {
+      fail(s"No Window node in plan:\n${df.queryExecution.executedPlan}")
+    }
+    val wanted = Set(
+      "number of segment-tree frames prepared",
+      "number of segment-tree fallback frames prepared")
+    windowNode.metrics.filter(m => wanted.contains(m.name)).map { m =>
+      // UI value may be "3" or "total (min, med, max ...)"; first int run is the total.
+      val raw = metricValues(m.accumulatorId)
+      val total = "-?\\d+".r.findFirstIn(raw).getOrElse {
+        fail(s"Could not parse integer from metric '${m.name}' value: $raw")
+      }
+      m.name -> total.toLong
+    }.toMap
+  }
+
+  private def baseDF = spark.range(0, 120).selectExpr(
+    "id", "(id % 3) AS pk", "CAST(id AS INT) AS v")
+
+  private val winSpec = Window.partitionBy($"pk").orderBy($"id").rowsBetween(-3, 3)
+
+  test("segment-tree path increments numSegmentTreeFrames (one per frame per partition)") {
+    withSQLConf(
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+        SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "1",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // Two aggregates share one Window -> one frame; 3 partitions => 3 frames.
+      val df = baseDF.select($"id",
+        min($"v").over(winSpec).as("mn"),
+        max($"v").over(winSpec).as("mx"))
+      val m = windowMetricValues(df)
+      assert(m("number of segment-tree frames prepared") === 3L,
+        s"expected 3 segtree frames (one per partition), got metrics = $m")
+      assert(m("number of segment-tree fallback frames prepared") === 0L,
+        s"fallback counter must be 0 when all partitions take segtree path, got $m")
+    }
+  }
+
+  test("fallback path increments numSegmentTreeFallbackFrames") {
+    withSQLConf(
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+        // Threshold > partition size (40 rows/partition) forces fallback on every partition.
+        SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "1000",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      val df = baseDF.select($"id", min($"v").over(winSpec).as("mn"))
+      val m = windowMetricValues(df)
+      assert(m("number of segment-tree fallback frames prepared") === 3L,
+        s"expected 3 fallback frames (one per partition under threshold), got $m")
+      assert(m("number of segment-tree frames prepared") === 0L,
+        s"segtree counter must be 0 when all partitions fall back, got $m")
+    }
+  }
+
+  test("feature flag off: both segment-tree counters stay at zero") {
+    withSQLConf(
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "false",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      val df = baseDF.select($"id", min($"v").over(winSpec).as("mn"))
+      val m = windowMetricValues(df)
+      assert(m("number of segment-tree frames prepared") === 0L,
+        s"segtree counter must be 0 when feature flag disabled, got $m")
+      assert(m("number of segment-tree fallback frames prepared") === 0L,
+        s"fallback counter must be 0 when feature flag disabled, got $m")
+    }
+  }
+
+  // T1-T4: regression for the removed idempotency guard in
+  // `SegmentTreeWindowFunctionFrame.prepare()`. The old guard keyed on
+  // `(System.identityHashCode(rows), rows.length)`, but `WindowPartitionEvaluator`
+  // reuses a single `ExternalAppendOnlyUnsafeRowArray` across all partitions in
+  // a task, so the identity hash is constant and the key collapsed to
+  // `rows.length` -- silently deduping consecutive equal-length partitions.
+  // All four tests pin `minPartitionRows = 64` and use a single shuffle
+  // partition so the Window operator sees `numPartitions > numTasks` -- the
+  // exact shape the existing segtree/fallback fixtures happened to avoid.
+
+  test("T1 (G1) numPartitions > numTasks, identical length: every partition counted") {
+    withSQLConf(
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+        SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "64",
+        SQLConf.SHUFFLE_PARTITIONS.key -> "1",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // 3 window partitions x 100 rows, all >= 64 => segtree, one task.
+      val df = spark.range(0, 300).selectExpr("id", "(id % 3) AS pk", "CAST(id AS INT) AS v")
+        .select($"id", min($"v").over(
+          Window.partitionBy($"pk").orderBy($"id").rowsBetween(-3, 3)).as("mn"))
+      val m = windowMetricValues(df)
+      assert(m("number of segment-tree frames prepared") === 3L,
+        s"expected 3 segtree frames (one per window partition), got $m")
+      assert(m("number of segment-tree fallback frames prepared") === 0L, s"got $m")
+    }
+  }
+
+  test("T2 (G2) identical-length partitions across keys") {
+    withSQLConf(
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+        SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "64",
+        SQLConf.SHUFFLE_PARTITIONS.key -> "1",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // Two distinct keys, each exactly 200 rows (> 64 => segtree).
+      // Old guard would dedupe the second because `rows.length` matches.
+      val df = spark.range(0, 400)
+        .selectExpr("id", "CASE WHEN id < 200 THEN 'a' ELSE 'b' END AS pk",
+          "CAST(id AS INT) AS v")
+        .select($"id", min($"v").over(
+          Window.partitionBy($"pk").orderBy($"id").rowsBetween(-3, 3)).as("mn"))
+      val m = windowMetricValues(df)
+      assert(m("number of segment-tree frames prepared") === 2L,
+        s"expected 2 segtree frames (one per key, both length 200), got $m")
+      assert(m("number of segment-tree fallback frames prepared") === 0L, s"got $m")
+    }
+  }
+
+  test("T3 (G3) all-length-1 unique keys, fallback path: every partition counted") {
+    withSQLConf(
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+        SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "64",
+        SQLConf.SHUFFLE_PARTITIONS.key -> "1",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // 100 unique keys, 1 row each, all < 64 => fallback. Old guard
+      // would collapse to 1 because every partition has length 1.
+      val df = spark.range(0, 100).selectExpr("id AS pk", "CAST(id AS INT) AS v")
+        .select($"pk", min($"v").over(
+          Window.partitionBy($"pk").orderBy($"pk").rowsBetween(-3, 3)).as("mn"))
+      val m = windowMetricValues(df)
+      assert(m("number of segment-tree fallback frames prepared") === 100L,
+        s"expected 100 fallback frames (one per unique-key partition), got $m")
+      assert(m("number of segment-tree frames prepared") === 0L, s"got $m")
+    }
+  }
+
+  test("T4 (G4) mixed segtree + fallback, non-aliasing order") {
+    withSQLConf(
+        SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+        SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "64",
+        SQLConf.SHUFFLE_PARTITIONS.key -> "1",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // 4 window partitions with lengths (50, 50, 100, 100) in sort order.
+      // 50 < 64 => fallback; 100 >= 64 => segtree. Under the old guard,
+      // consecutive same-length partitions collapsed => (fb=1, seg=1);
+      // after the fix => (fb=2, seg=2).
+      // TRAP: an alternating (50,100,50,100) ordering coincidentally gives
+      // (2,2) under both code paths (key changes every step) -- false
+      // positive, do not use. Size-monotone ordering is required.
+      val df = spark.range(0, 300)
+        .selectExpr(
+          "id",
+          // key ordering k1<k2<k3<k4 matches size-monotone (50,50,100,100)
+          "CASE WHEN id < 50 THEN 'k1' " +
+            "WHEN id < 100 THEN 'k2' " +
+            "WHEN id < 200 THEN 'k3' " +
+            "ELSE 'k4' END AS pk",
+          "CAST(id AS INT) AS v")
+        .select($"id", min($"v").over(
+          Window.partitionBy($"pk").orderBy($"id").rowsBetween(-3, 3)).as("mn"))
+      val m = windowMetricValues(df)
+      assert(m("number of segment-tree frames prepared") === 2L,
+        s"expected 2 segtree frames (k3, k4 @ 100 rows each), got $m")
+      assert(m("number of segment-tree fallback frames prepared") === 2L,
+        s"expected 2 fallback frames (k1, k2 @ 50 rows each), got $m")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowTestHelper.scala
new file mode 100644
index 0000000000000..cd5237c9b310f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/SegmentTreeWindowTestHelper.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.memory.MemoryTestingUtils
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.Sum
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
+import org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Shared helpers for segment-tree window-frame tests. Kept in the same
+ * package so tests can reach `private[window]` hooks on
+ * [[SegmentTreeWindowFunctionFrame]] (see contract Section 1.3).
+ */
+private[window] object SegmentTreeWindowTestHelper {
+
+  /**
+   * Fake-TaskContext plumbing for lifecycle tests. Gives `body` a factory
+   * producing `newArray(rows)` and `newFrame()` (unprepared — caller drives
+   * prepare/write/close). Tracks frames so teardown always closes them.
+   */
+  def withFrameFactory(conf: SQLConf)
+      (body: FrameFactory => Unit): Unit = {
+    val existing = TaskContext.get()
+    val owned = existing == null
+    val tc = if (owned) {
+      val fake = MemoryTestingUtils.fakeTaskContext(SparkEnv.get)
+      TaskContext.setTaskContext(fake)
+      fake
+    } else existing
+    val factory = new FrameFactory(conf, tc)
+    try body(factory) finally {
+      factory.closeAll()
+      if (owned) TaskContext.unset()
+    }
+  }
+
+  final class FrameFactory(conf: SQLConf, tc: TaskContext) {
+    private val attr = AttributeReference("v", IntegerType, nullable = false)()
+    private val input = Seq[Attribute](attr)
+    private val fn = Sum(attr)
+    private val bufAttrs = fn.aggBufferAttributes
+    private val processor = AggregateProcessor(
+      Array[Expression](fn),
+      0,
+      input,
+      (es, s) => GenerateMutableProjection.generate(es, s),
+      Array[Option[Expression]](None))
+    private val unsafeProj = UnsafeProjection.create(Array(attr.dataType))
+    private val opened = scala.collection.mutable.Buffer[AutoCloseable]()
+
+    def newArray(rows: Int): ExternalAppendOnlyUnsafeRowArray = {
+      val array = new ExternalAppendOnlyUnsafeRowArray(
+        tc.taskMemoryManager(),
+        SparkEnv.get.blockManager,
+        SparkEnv.get.serializerManager,
+        tc,
+        1024,
+        SparkEnv.get.memoryManager.pageSizeBytes,
+        Int.MaxValue,
+        Long.MaxValue,
+        Int.MaxValue,
+        Long.MaxValue)
+      var i = 0
+      while (i < rows) {
+        array.add(unsafeProj(new GenericInternalRow(Array[Any](i))))
+        i += 1
+      }
+      array
+    }
+
+    /** Create a new frame. Caller owns lifecycle unless tracked via `track()`. */
+    def newFrame(): SegmentTreeWindowFunctionFrame = {
+      val target = new SpecificInternalRow(Seq(bufAttrs.head.dataType))
+      val frame = new SegmentTreeWindowFunctionFrame(
+        target,
+        processor,
+        Array(fn),
+        input,
+        RowFrame,
+        RowBoundOrdering(-1),
+        RowBoundOrdering(1),
+        (es, s) => GenerateMutableProjection.generate(es, s),
+        conf,
+        None,
+        tc.taskMemoryManager())
+      track(frame)
+      frame
+    }
+
+    def track[T <: AutoCloseable](c: T): T = { opened += c; c }
+
+    def closeAll(): Unit = {
+      opened.foreach { c =>
+        try c.close() catch { case _: Throwable => () }
+      }
+      opened.clear()
+    }
+  }
+
+  /**
+   * Build a small-partition [[SegmentTreeWindowFunctionFrame]] (Sum over one
+   * IntegerType column, values 0..rows-1, frame -1..+1), drive it through
+   * all rows, then hand it to `body`. Manages a fake TaskContext if needed
+   * and always closes the frame.
+   */
+  def withSmallPartitionFrame(conf: SQLConf, rows: Int)
+      (body: SegmentTreeWindowFunctionFrame => Unit): Unit = {
+    withFrameFactory(conf) { factory =>
+      val frame = factory.newFrame()
+      val array = factory.newArray(rows)
+      frame.prepare(array)
+      var j = 0
+      while (j < rows) {
+        frame.write(j, InternalRow.empty)
+        j += 1
+      }
+      body(frame)
+    }
+  }
+
+  /**
+   * Build the given [[WindowSegmentTree]] from a row iterator. After the
+   * buffer-ownership flip the caller (this helper) materialises an
+   * [[ExternalAppendOnlyUnsafeRowArray]], projects rows via
+   * [[UnsafeProjection]], and hands it to the tree. The tree retains the
+   * array; close is idempotent via `tree.close()` / partition teardown.
+   *
+   * `inMemoryThreshold` / `spillThreshold` let tests exercise the spill
+   * path (see `WindowSegmentTreeSuite` D9).
+   */
+  def buildTreeFromIter(
+      tree: WindowSegmentTree,
+      rows: Iterator[InternalRow],
+      inputSchema: Seq[Attribute],
+      inMemoryThreshold: Int = Int.MaxValue,
+      spillThreshold: Int = Int.MaxValue): Unit = {
+    val tc = TaskContext.get()
+    require(tc != null, "buildTreeFromIter requires an active TaskContext")
+    val env = SparkEnv.get
+    val array = new ExternalAppendOnlyUnsafeRowArray(
+      tc.taskMemoryManager(),
+      env.blockManager,
+      env.serializerManager,
+      tc,
+      1024,
+      env.memoryManager.pageSizeBytes,
+      inMemoryThreshold,
+      Long.MaxValue,
+      spillThreshold,
+      Long.MaxValue)
+    val proj = UnsafeProjection.create(inputSchema.map(_.dataType).toArray)
+    rows.foreach(r => array.add(proj(r)))
+    tree.build(array)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeAllowlistSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeAllowlistSuite.scala
new file mode 100644
index 0000000000000..d3da8e40c85a4
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeAllowlistSuite.scala
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.execution.SparkPlanInfo
+import org.apache.spark.sql.execution.metric.SQLMetricsTestUtils
+import org.apache.spark.sql.execution.ui.SparkPlanGraph
+import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Coverage for the explicit segment-tree aggregate allowlist
+ * ([[WindowSegmentTree.EligibleAggregates]]):
+ *   - allowlisted aggregates route to segtree (`numSegmentTreeFrames` bumps).
+ *   - non-allowlisted aggregates fall through to the sliding path without
+ *     crashing or producing wrong results (segtree counters stay at 0).
+ * Eligibility gating only; exhaustive equivalence lives in
+ * [[SegmentTreeWindowFunctionSuite]].
+ */
+class WindowSegmentTreeAllowlistSuite
+    extends QueryTest with SharedSparkSession with SQLMetricsTestUtils {
+
+  import testImplicits._
+
+  private val enableSegTree: Map[String, String] = Map(
+    SQLConf.WINDOW_SEGMENT_TREE_ENABLED.key -> "true",
+    SQLConf.WINDOW_SEGMENT_TREE_MIN_PARTITION_ROWS.key -> "1")
+
+  private def baseDF = spark.range(0, 120).selectExpr(
+    "id", "(id % 3) AS pk", "CAST(id AS INT) AS v", "CAST(id AS DOUBLE) AS vd")
+
+  private val winSpec = Window.partitionBy($"pk").orderBy($"id").rowsBetween(-3, 3)
+
+  private def segTreeCounters(df: DataFrame): (Long, Long) = {
+    val previousExecutionIds = currentExecutionIds()
+    df.collect()
+    sparkContext.listenerBus.waitUntilEmpty(10000)
+    val executionId = currentExecutionIds().diff(previousExecutionIds).head
+    val metricValues = statusStore.executionMetrics(executionId)
+    val graph = SparkPlanGraph(SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan))
+    val windowNode = graph.allNodes.find(_.name == "Window").getOrElse {
+      fail(s"No Window node in plan:\n${df.queryExecution.executedPlan}")
+    }
+    def total(name: String): Long = {
+      val mOpt = windowNode.metrics.find(_.name == name)
+      mOpt.map { m =>
+        val raw = metricValues(m.accumulatorId)
+        "-?\\d+".r.findFirstIn(raw).map(_.toLong).getOrElse(0L)
+      }.getOrElse(0L)
+    }
+    (total("number of segment-tree frames prepared"),
+      total("number of segment-tree fallback frames prepared"))
+  }
+
+  // Positive: allowlisted aggregates route to segtree
+
+  Seq(
+    ("min", (c: org.apache.spark.sql.Column) => min(c)),
+    ("max", (c: org.apache.spark.sql.Column) => max(c)),
+    ("sum", (c: org.apache.spark.sql.Column) => sum(c)),
+    ("count", (c: org.apache.spark.sql.Column) => count(c)),
+    ("avg", (c: org.apache.spark.sql.Column) => avg(c)),
+    ("stddev_pop", (c: org.apache.spark.sql.Column) => stddev_pop(c)),
+    ("stddev_samp", (c: org.apache.spark.sql.Column) => stddev_samp(c)),
+    ("var_pop", (c: org.apache.spark.sql.Column) => var_pop(c)),
+    ("var_samp", (c: org.apache.spark.sql.Column) => var_samp(c))
+  ).foreach { case (name, fn) =>
+    test(s"$name routes to the segment-tree path") {
+      withSQLConf(enableSegTree.toSeq: _*) {
+        val df = baseDF.withColumn("agg", fn($"vd").over(winSpec))
+        val (seg, fallback) = segTreeCounters(df)
+        assert(seg > 0, s"$name should bump numSegmentTreeFrames (got $seg)")
+        assert(fallback == 0,
+          s"$name should not fall back (fallback counter: $fallback)")
+      }
+    }
+  }
+
+  // Negative: non-allowlisted aggregates fall through
+
+  test("first_value falls through (order-dependent aggregate)") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = baseDF.withColumn("agg", first($"v").over(winSpec))
+      val (seg, _) = segTreeCounters(df)
+      assert(seg == 0, s"first_value should not use segment tree (got $seg frames)")
+    }
+  }
+
+  test("last_value falls through (order-dependent aggregate)") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = baseDF.withColumn("agg", last($"v").over(winSpec))
+      val (seg, _) = segTreeCounters(df)
+      assert(seg == 0, s"last_value should not use segment tree (got $seg frames)")
+    }
+  }
+
+  test("collect_list falls through (unbounded buffer)") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = baseDF.withColumn("agg", collect_list($"v").over(winSpec))
+      val (seg, _) = segTreeCounters(df)
+      assert(seg == 0, s"collect_list should not use segment tree (got $seg frames)")
+    }
+  }
+
+  test("collect_set falls through (unbounded buffer)") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = baseDF.withColumn("agg", collect_set($"v").over(winSpec))
+      val (seg, _) = segTreeCounters(df)
+      assert(seg == 0, s"collect_set should not use segment tree (got $seg frames)")
+    }
+  }
+
+  test("approx_count_distinct (HyperLogLog++) falls through (fail-closed)") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = baseDF.withColumn("agg", approx_count_distinct($"v").over(winSpec))
+      val (seg, _) = segTreeCounters(df)
+      assert(seg == 0,
+        s"approx_count_distinct is intentionally not on the allowlist (got $seg frames)")
+    }
+  }
+
+  test("percentile_approx falls through (sketch buffer not auditable)") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = baseDF.withColumn(
+        "agg", percentile_approx($"vd", lit(0.5), lit(100)).over(winSpec))
+      val (seg, _) = segTreeCounters(df)
+      assert(seg == 0, s"percentile_approx should not use segment tree (got $seg frames)")
+    }
+  }
+
+  // Gate: aggregates carrying a FILTER (WHERE ...) clause fall through.
+  // `eligibleForSegTree` requires `filters.forall(_.isEmpty)` because the
+  // segment-tree combine contract is defined over the unfiltered partial
+  // buffer. A defensive regression: if any future analyzer rule ever rewrites
+  // `AGG(x) FILTER (WHERE p)` in a way that strips `AggregateExpression.filter`
+  // (e.g., pushing the predicate into the aggregate function), this test
+  // fails and forces an explicit eligibility review.
+  test("FILTER (WHERE ...) disables segment-tree path") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      withTempView("t") {
+        baseDF.createOrReplaceTempView("t")
+        val df = spark.sql(
+          """SELECT id, pk, v,
+            |  sum(v) FILTER (WHERE v % 2 = 0)
+            |    OVER (PARTITION BY pk ORDER BY id ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING)
+            |    AS filtered_sum
+            |FROM t""".stripMargin)
+        val (seg, _) = segTreeCounters(df)
+        assert(seg == 0,
+          s"filtered aggregate must not take segment-tree path (got $seg segtree frames)")
+      }
+    }
+  }
+
+  // Mixed: ANY non-eligible aggregate disqualifies the group
+
+  test("mix of allowlisted + non-allowlisted aggregates falls through entirely") {
+    withSQLConf(enableSegTree.toSeq: _*) {
+      val df = baseDF
+        .withColumn("s", sum($"v").over(winSpec))
+        .withColumn("fv", first($"v").over(winSpec))
+      val (seg, _) = segTreeCounters(df)
+      // Both aggregates share the same Window node; gating is forall(isEligible),
+      // so `first_value` drops the whole group.
+      assert(seg == 0,
+        s"Window group containing a non-allowlisted agg must fall through (got $seg)")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeMemorySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeMemorySuite.scala
new file mode 100644
index 0000000000000..e60253042c178
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeMemorySuite.scala
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import java.util.Properties
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TaskContext, TaskContextImpl}
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.memory.{MemoryConsumer, MemoryMode, SparkOutOfMemoryError, TaskMemoryManager, TestMemoryManager}
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow, MutableProjection, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{DeclarativeAggregate, Min}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Memory-manager integration tests for [[WindowSegmentTree]]. Covers:
+ *  - `SegTreeSpiller` registration with `TaskMemoryManager`
+ *  - `acquireBlockMemory` grant / partial-grant rollback
+ *  - `evictUntil` LRU eviction driven by TMM pressure
+ *  - `spill()` self-trigger short-circuit and rowArray-spilled fall-through
+ *  - task completion / kill listener releasing all cached blocks
+ * T5 (rowArray-spilled priority) and T8 (task-kill listener) are kept as
+ * `ignore`d stubs so the matrix stays visible; each documents what it needs.
+ */
+class WindowSegmentTreeMemorySuite extends SparkFunSuite with LocalSparkContext {
+
+  // common fixtures
+
+  private val inputAttr: AttributeReference =
+    AttributeReference("v", IntegerType, nullable = true)()
+  private val inputSchema: Seq[Attribute] = Seq(inputAttr)
+
+  private def newMutableProjection
+      : (Seq[Expression], Seq[Attribute]) => MutableProjection =
+    (exprs, attrs) => GenerateMutableProjection.generate(exprs, attrs)
+
+  private def minAgg: DeclarativeAggregate = Min(inputAttr)
+
+  /**
+   * Standalone `TaskMemoryManager` backed by `TestMemoryManager` (not routed
+   * through `SparkEnv`), with a matching `TaskContextImpl` installed.
+   * Restores the previous `TaskContext` on exit.
+   *
+   * @param budget  initial execution memory; `Long.MaxValue` for T2,
+   *                tight value (e.g. 2 * blockBytes) for T3.
+   * @param offHeap when true, enables Tungsten off-heap mode (T9).
+   */
+  private def withTmm[T](budget: Long = Long.MaxValue, offHeap: Boolean = false)
+      (body: (TaskMemoryManager, TestMemoryManager) => T): T = {
+    val conf = new SparkConf(false)
+      .set("spark.memory.offHeap.enabled", offHeap.toString)
+      .set("spark.memory.offHeap.size", "1048576")
+    if (sc == null) {
+      sc = new SparkContext("local", "WindowSegmentTreeMemorySuite", conf)
+    }
+    val mm = new TestMemoryManager(conf)
+    mm.limit(budget)
+    val tmm = new TaskMemoryManager(mm, 0)
+    val prev = TaskContext.get()
+    val tc = new TaskContextImpl(
+      stageId = 0, stageAttemptNumber = 0, partitionId = 0,
+      taskAttemptId = 0, attemptNumber = 0, numPartitions = 1,
+      taskMemoryManager = tmm,
+      localProperties = new Properties,
+      metricsSystem = null.asInstanceOf[MetricsSystem],
+      taskMetrics = TaskMetrics.empty,
+      cpus = 1)
+    TaskContext.setTaskContext(tc)
+    try body(tmm, mm)
+    finally {
+      if (prev != null) TaskContext.setTaskContext(prev) else TaskContext.unset()
+    }
+  }
+
+  private def buildTree(
+      tmm: TaskMemoryManager,
+      values: Seq[Int],
+      fanout: Int = 4,
+      blockSize: Int = 8,
+      maxCachedBlocks: Option[Int] = Some(2)): WindowSegmentTree = {
+    val tree = new WindowSegmentTree(
+      Array(minAgg), inputSchema, newMutableProjection,
+      fanout = fanout, blockSize = blockSize,
+      maxCachedBlocks = maxCachedBlocks,
+      taskMemoryManager = tmm)
+    val rows = values.iterator.map { v =>
+      val r = new GenericInternalRow(1); r.update(0, v); r.asInstanceOf[InternalRow]
+    }
+    SegmentTreeWindowTestHelper.buildTreeFromIter(tree, rows, inputSchema)
+    tree
+  }
+
+  private def newOutBuffer(): SpecificInternalRow =
+    new SpecificInternalRow(Seq[org.apache.spark.sql.types.DataType](IntegerType))
+
+  private def queryMin(tree: WindowSegmentTree, lo: Int, hi: Int): Any = {
+    val out = newOutBuffer()
+    tree.query(lo, hi, out)
+    if (out.isNullAt(0)) null else out.getInt(0)
+  }
+
+  private def naiveMin(vs: Seq[Int], lo: Int, hi: Int): Any =
+    if (lo >= hi) null else vs.slice(lo, hi).min
+
+  test("T1 constructor rejects null TaskMemoryManager") {
+    val ex = intercept[IllegalArgumentException] {
+      new WindowSegmentTree(
+        Array(minAgg), inputSchema, newMutableProjection,
+        taskMemoryManager = null)
+    }
+    assert(ex.getMessage.contains("non-null TaskMemoryManager"))
+  }
+
+  test("T2 ample budget: query correctness and positive memory usage") {
+    withTmm() { (tmm, _) =>
+      val values = Seq(5, 2, 9, 1, 7, 3, 4, 8, 6, 0, 11, 12, 13, 14, 15, 16)
+      val tree = buildTree(tmm, values, fanout = 4, blockSize = 4,
+        maxCachedBlocks = Some(4))
+      try {
+        // Force block-level queries so the LRU actually populates.
+        assert(queryMin(tree, 0, values.length) == values.min)
+        assert(queryMin(tree, 1, 14) == naiveMin(values, 1, 14))
+        assert(queryMin(tree, 5, 11) == naiveMin(values, 5, 11))
+        // Positive consumption: at least one block level cached -> spiller acquired.
+        assert(tmm.getMemoryConsumptionForThisTask > 0L,
+          "Expected positive memory consumption after caching block levels")
+      } finally tree.close()
+      // All bytes released after close.
+      assert(tmm.getMemoryConsumptionForThisTask == 0L,
+        "Memory consumption must return to 0 after close()")
+    }
+  }
+
+  test("T3 tight budget forces spill: results still match baseline") {
+    val values = (0 until 40).map(i => (i * 37 + 11) % 97)
+    // Baseline under ample budget.
+    val baseline: Seq[Any] = {
+      var captured: Seq[Any] = Seq.empty
+      withTmm() { (tmm, _) =>
+        val tree = buildTree(tmm, values, fanout = 4, blockSize = 4,
+          maxCachedBlocks = Some(3))
+        try {
+          captured = (0 to values.length by 5).map(i => queryMin(tree, 0, i))
+        } finally {
+          tree.close()
+        }
+      }
+      captured
+    }
+    // Tight budget: ~1 block headroom -> any new block-level load evicts the prior.
+    // Sizing: (fanout=4, blockSize=4) -> level shape [4, 1] slots;
+    // at bufferWidth=16 B/slot blockBytes = 5 * 16 = 80 B.
+    // budget=128 B admits exactly one block and forces spill on the second.
+    withTmm(budget = 128L) { (tmm, _) =>
+      val tree = buildTree(tmm, values, fanout = 4, blockSize = 4,
+        maxCachedBlocks = Some(3))
+      try {
+        val observed = (0 to values.length by 5).map(i => queryMin(tree, 0, i))
+        assert(observed == baseline,
+          s"spill-path answers diverged: observed=$observed baseline=$baseline")
+      } finally tree.close()
+      assert(tmm.getMemoryConsumptionForThisTask == 0L)
+    }
+  }
+
+  test("T4 self-trigger: spill(_, this) returns 0 and does not evict") {
+    withTmm() { (tmm, _) =>
+      val tree = buildTree(tmm, (0 until 16), fanout = 4, blockSize = 4,
+        maxCachedBlocks = Some(3))
+      try {
+        // Warm the cache.
+        assert(queryMin(tree, 0, 16) == 0)
+        val before = tmm.getMemoryConsumptionForThisTask
+        assert(before > 0L, "precondition: some block levels must be cached")
+        val spiller = tree.testOnlySpiller()
+        val freed = spiller.spill(Long.MaxValue, spiller)
+        assert(freed == 0L, s"self-trigger spill must return 0L, got $freed")
+        val after = tmm.getMemoryConsumptionForThisTask
+        assert(after == before,
+          s"cache size must not change on self-trigger (before=$before after=$after)")
+      } finally tree.close()
+    }
+  }
+
+  test("T6 close() is idempotent and releases all acquired bytes") {
+    withTmm() { (tmm, _) =>
+      val tree = buildTree(tmm, (0 until 20), fanout = 4, blockSize = 4,
+        maxCachedBlocks = Some(3))
+      assert(queryMin(tree, 0, 20) == 0)
+      val peak = tmm.getMemoryConsumptionForThisTask
+      assert(peak > 0L)
+      tree.close()
+      assert(tmm.getMemoryConsumptionForThisTask == 0L,
+        "Memory must be fully released after first close()")
+      // Second close must be a no-op.
+      tree.close()
+      assert(tmm.getMemoryConsumptionForThisTask == 0L,
+        "Second close() must remain a no-op")
+    }
+  }
+
+  test("T7 prepare mid-way failure releases all previously acquired blocks") {
+    // Inject failure by pre-setting consequentOOM to a moderate N so
+    // acquireMemory returns 0 after the first few successful blocks.
+    withTmm() { (tmm, mm) =>
+      val values = (0 until 40).map(i => 40 - i)
+      val tree = buildTree(tmm, values, fanout = 4, blockSize = 4,
+        maxCachedBlocks = Some(10))
+      try {
+        // Warm up: populate cache entries.
+        assert(queryMin(tree, 0, 40) == 1)
+        // Force next acquireMemory calls to fail, then trigger cold-cache via
+        // evict + re-acquire. Release everything so the next query re-acquires.
+        val spiller = tree.testOnlySpiller()
+        spiller.spill(Long.MaxValue, new MemoryConsumer(tmm,
+            tmm.pageSizeBytes(), MemoryMode.ON_HEAP) {
+          override def spill(size: Long, trigger: MemoryConsumer): Long = 0L
+        })
+        assert(tmm.getMemoryConsumptionForThisTask == 0L,
+          "after full spill, accounting must be zero")
+        // Force next N acquireMemory calls to grant 0 bytes (hard OOM).
+        // With maxCachedBlocks=10 and a cold cache, ensureBlockLevels will
+        // call acquireBlockMemory; initial grant and post-evict retry both
+        // see 0 -> SparkOutOfMemoryError.
+        mm.markConsequentOOM(10)
+        val ex = intercept[SparkOutOfMemoryError](queryMin(tree, 0, 20))
+        assert(ex.getMessage.contains("UNABLE_TO_ACQUIRE_MEMORY"),
+          s"unexpected OOM message: ${ex.getMessage}")
+        // Failed acquire must not leak bytes against the task.
+        assert(tmm.getMemoryConsumptionForThisTask == 0L,
+          "After failed acquire, accounting must be zero (no partial leaks)")
+      } finally tree.close()
+      assert(tmm.getMemoryConsumptionForThisTask == 0L)
+    }
+  }
+
+  test("T9 ON_HEAP Tungsten: spiller mode is ON_HEAP") {
+    withTmm(offHeap = false) { (tmm, _) =>
+      val tree = buildTree(tmm, (0 until 16), fanout = 4, blockSize = 4,
+        maxCachedBlocks = Some(2))
+      try {
+        assert(queryMin(tree, 0, 16) == 0)
+        assert(tree.testOnlySpiller().getMode == MemoryMode.ON_HEAP)
+        // In ON_HEAP Tungsten, our hardcoded mode also coincides with
+        // `tmm.getTungstenMemoryMode`, so segtree and rowArray share the
+        // pool and I8 (rowArray-spilled short-circuit) fires.
+        assert(tree.testOnlySpiller().getMode == tmm.getTungstenMemoryMode)
+      } finally tree.close()
+    }
+  }
+
+  test("T9 OFF_HEAP Tungsten: spiller mode stays ON_HEAP (not OFF_HEAP)") {
+    withTmm(offHeap = true) { (tmm, _) =>
+      val tree = buildTree(tmm, (0 until 16), fanout = 4, blockSize = 4,
+        maxCachedBlocks = Some(2))
+      try {
+        assert(queryMin(tree, 0, 16) == 0)
+        // Segtree's cache is JVM-heap (`SpecificInternalRow` /
+        // `Array[Array[InternalRow]]`), never Tungsten pages. The spiller
+        // must stay ON_HEAP regardless of `spark.memory.offHeap.enabled`,
+        // or `spill(n)` would phantom-credit the off-heap pool and
+        // violate TMM's same-pool spill contract. Mirrors `Spillable`.
+        assert(tree.testOnlySpiller().getMode == MemoryMode.ON_HEAP)
+        assert(tmm.getTungstenMemoryMode == MemoryMode.OFF_HEAP)
+        assert(tree.testOnlySpiller().getMode != tmm.getTungstenMemoryMode)
+      } finally tree.close()
+    }
+  }
+
+  test("T10 blockBytes oracle: hand-computed table + runtime-anchored cross-check") {
+    // Part A: hand-computed golden table. Values derived by listing the
+    // cached level widths from buildBlockLevels * 16 B/field. Independent of
+    // production `cachedSlotsPerBlock` -- fencepost or dropped-level
+    // regressions flip at least one case. Cases chosen to cover:
+    //   blockSize == 1    (single leaf, no parents), (F=3,B=5) and (F=4,B=7)
+    //   (non-power-of-fanout asymmetric tails), (F=16,B=65536) (deep tree).
+    // Level breakdown (leaves + parents + ... + root):
+    //   (F=4,  B=1)     -> 1                              =    1 slot
+    //   (F=4,  B=4)     -> 4 + 1                          =    5 slots
+    //   (F=3,  B=5)     -> 5 + 2 + 1                      =    8 slots
+    //   (F=4,  B=7)     -> 7 + 2 + 1                      =   10 slots
+    //   (F=8,  B=16)    -> 16 + 2 + 1                     =   19 slots
+    //   (F=16, B=256)   -> 256 + 16 + 1                   =  273 slots
+    //   (F=16, B=65536) -> 65536 + 4096 + 256 + 16 + 1    = 69905 slots
+    val expectedSlots: Seq[((Int, Int), Long)] = Seq(
+      (4, 1) -> 1L,
+      (4, 4) -> 5L,
+      (3, 5) -> 8L,
+      (4, 7) -> 10L,
+      (8, 16) -> 19L,
+      (16, 256) -> 273L,
+      (16, 65536) -> 69905L)
+
+    withTmm() { (tmm, _) =>
+      for (((fanout, blockSize), slots) <- expectedSlots) {
+        val tree = buildTree(tmm, Seq(1, 2, 3, 4, 5, 6, 7, 8),
+          fanout = fanout, blockSize = blockSize, maxCachedBlocks = Some(1))
+        try {
+          // Min on one IntegerType field -> 1 buffer field x 16 B.
+          val expected = math.max(1L, slots * 16L)
+          assert(tree.peekBlockBytes == expected,
+            s"blockBytes mismatch at (F=$fanout, blockSize=$blockSize): " +
+              s"got=${tree.peekBlockBytes} expected=$expected (slots=$slots)")
+        } finally tree.close()
+      }
+    }
+
+    // Part B: runtime-anchored cross-check. Build a block, read actual cached
+    // level array lengths via peek hooks, assert
+    // `blockBytes == sum(levels) * bufferWidth`. Guards against the formula
+    // and buildBlockLevels drifting apart.
+    withTmm() { (tmm, _) =>
+      val fanout = 4
+      val blockSize = 8
+      val tree = buildTree(tmm, (0 until blockSize),
+        fanout = fanout, blockSize = blockSize, maxCachedBlocks = Some(1))
+      try {
+        // Materialize block 0.
+        assert(queryMin(tree, 0, blockSize) == 0)
+        val levelCount = tree.peekLevelCount(0)
+        var slotSum = 0L
+        var lvl = 0
+        while (lvl < levelCount) {
+          slotSum += tree.peekLevelSize(0, lvl)
+          lvl += 1
+        }
+        val expected = math.max(1L, slotSum * 16L)
+        assert(tree.peekBlockBytes == expected,
+          s"runtime-anchored oracle mismatch: slots=$slotSum " +
+            s"expected=$expected got=${tree.peekBlockBytes}")
+      } finally tree.close()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreePropertySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreePropertySuite.scala
new file mode 100644
index 0000000000000..fc92fbed05e27
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreePropertySuite.scala
@@ -0,0 +1,475 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import org.scalacheck.Gen
+import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv, SparkFunSuite, TaskContext}
+import org.apache.spark.memory.MemoryTestingUtils
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow, MutableProjection, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{Average, Count, DeclarativeAggregate, Max, Min, StddevPop, StddevSamp, Sum}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
+import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType}
+
+/**
+ * Property-based tests for [[WindowSegmentTree]] using ScalaCheck. Row-by-row
+ * naive aggregate is the parallel oracle: every property reduces to
+ * `segtree(input) == naive(input)`.
+ * Follows `ExpressionEvalHelper` convention (mixes in
+ * [[ScalaCheckDrivenPropertyChecks]]). All generated cases share one
+ * `SparkContext`/`TaskMemoryManager` by wrapping `forAll` inside a single
+ * `withTaskContext`; do NOT invert.
+ */
+class WindowSegmentTreePropertySuite extends SparkFunSuite
+    with LocalSparkContext with ScalaCheckDrivenPropertyChecks {
+
+  // ScalaCheck config: deterministic seed (ScalaCheck 1.14+ default).
+  // Quick tier: 100 cases for P1, 10 cases for P2 (see class doc).
+  implicit override val generatorDrivenConfig: PropertyCheckConfiguration =
+    PropertyCheckConfiguration(minSuccessful = 100, minSize = 0, sizeRange = 100)
+
+  // test harness (mirrors WindowSegmentTreeSuite)
+
+  private def newMutableProjection
+      : (Seq[Expression], Seq[Attribute]) => MutableProjection =
+    (exprs, attrs) => GenerateMutableProjection.generate(exprs, attrs)
+
+  private def withTaskContext[T](body: => T): T = {
+    sc = new SparkContext("local", "property-based-test", new SparkConf(false))
+    try {
+      val taskContext = MemoryTestingUtils.fakeTaskContext(SparkEnv.get)
+      TaskContext.setTaskContext(taskContext)
+      try body finally TaskContext.unset()
+    } finally {
+      if (sc != null) {
+        sc.stop()
+        sc = null
+      }
+    }
+  }
+
+  // Oracle
+  sealed trait AggKind
+  case object AggMin extends AggKind
+  case object AggMax extends AggKind
+  case object AggSum extends AggKind
+  case object AggCount extends AggKind
+
+  /**
+   * Naive oracle over [lo, hi). Empty frame =&gt; None for Min/Max/Sum
+   * (Spark returns NULL); for Count returns Some(0). null inputs are
+   * skipped (Spark Min/Max/Sum/Count semantics).
+   */
+  private def naiveAgg(
+      values: IndexedSeq[Option[Long]],
+      lo: Int, hi: Int,
+      kind: AggKind): Option[Long] = kind match {
+    case AggMin | AggMax =>
+      if (lo >= hi) None
+      else {
+        var result: Option[Long] = None
+        var i = lo
+        while (i < hi) {
+          values(i) match {
+            case Some(v) =>
+              result = result match {
+                case None => Some(v)
+                case Some(cur) => kind match {
+                  case AggMin => if (v < cur) Some(v) else result
+                  case AggMax => if (v > cur) Some(v) else result
+                  case _ => result
+                }
+              }
+            case None =>
+          }
+          i += 1
+        }
+        result
+      }
+    case AggSum =>
+      // Spark Sum returns NULL when all inputs are null OR frame empty.
+      var sawNonNull = false
+      var s = 0L
+      var i = lo
+      while (i < hi) {
+        values(i) match {
+          case Some(v) => sawNonNull = true; s += v
+          case None =>
+        }
+        i += 1
+      }
+      if (sawNonNull) Some(s) else None
+    case AggCount =>
+      // Spark COUNT(col) over empty frame returns 0 (never null).
+      var c = 0L
+      var i = lo
+      while (i < hi) {
+        if (values(i).isDefined) c += 1
+        i += 1
+      }
+      Some(c)
+  }
+
+  // Input case ADT
+  private case class PbtCase(
+      values: IndexedSeq[Option[Long]],
+      dataType: DataType,   // IntegerType or LongType
+      agg: AggKind,
+      blockSize: Int,
+      fanout: Int) {
+    def n: Int = values.length
+  }
+
+  // Generators
+  /** Weighted N: bias toward small boundaries (0,1,2 never hit under uniform). */
+  private val genN: Gen[Int] = Gen.frequency(
+    (2, Gen.choose(0, 4)),
+    (4, Gen.choose(5, 128)),
+    (4, Gen.choose(129, 5000))
+  )
+
+  /**
+   * 20% null, 80% bounded. For (AggSum, LongType) range shrunk to +/-1e12
+   * so worst-case sum (1e12 x 5000 = 5e15) stays inside Long bounds.
+   */
+  private def genValue(dt: DataType, agg: AggKind): Gen[Option[Long]] = {
+    val bounded: Gen[Long] = (dt, agg) match {
+      case (IntegerType, _) =>
+        Gen.choose(Int.MinValue.toLong, Int.MaxValue.toLong)
+      case (LongType, AggSum) =>
+        Gen.choose(-1000000000000L, 1000000000000L) // +/-1e12
+      case (LongType, _) =>
+        Gen.choose(Long.MinValue, Long.MaxValue)
+      case _ => throw new IllegalArgumentException(s"unsupported: $dt")
+    }
+    Gen.frequency(
+      (1, Gen.const(None: Option[Long])),
+      (4, bounded.map(Some(_)))
+    )
+  }
+
+  private val genBlockSize: Gen[Int] =
+    Gen.oneOf(1, 16, 32, 64, 256, 1024, 4096, 65536)
+
+  private val genFanout: Gen[Int] = Gen.oneOf(2, 3, 4, 8, 16)
+
+  private val genAgg: Gen[AggKind] =
+    Gen.oneOf(AggMin, AggMax, AggSum, AggCount)
+
+  private val genType: Gen[DataType] = Gen.oneOf(IntegerType, LongType)
+
+  private val genCase: Gen[PbtCase] = for {
+    dt <- genType
+    agg <- genAgg
+    n <- genN
+    values <- Gen.listOfN(n, genValue(dt, agg)).map(_.toIndexedSeq)
+    blockSize <- genBlockSize
+    fanout <- genFanout
+  } yield PbtCase(values, dt, agg, blockSize, fanout)
+
+  /** Frame [lo, hi) within [0, n]. */
+  private val genFrame: PbtCase => Gen[(Int, Int)] = { c =>
+    val n = c.n
+    for {
+      a <- Gen.choose(0, n)
+      b <- Gen.choose(0, n)
+    } yield if (a <= b) (a, b) else (b, a)
+  }
+
+  // Tree build/query glue
+  private def buildAttr(dt: DataType): AttributeReference =
+    AttributeReference("v", dt, nullable = true)()
+
+  /** Output type of the aggregate (segtree's queryInto materializes this). */
+  private def aggOutputType(c: PbtCase): DataType = c.agg match {
+    case AggMin | AggMax => c.dataType        // same as input
+    case AggSum | AggCount => LongType        // Sum(Int|Long)->Long; Count->Long
+  }
+
+  private def buildTreeFor(c: PbtCase): (WindowSegmentTree, AttributeReference) = {
+    val attr = buildAttr(c.dataType)
+    val schema: Seq[Attribute] = Seq(attr)
+    val agg: DeclarativeAggregate = c.agg match {
+      case AggMin => Min(attr)
+      case AggMax => Max(attr)
+      case AggSum => Sum(attr)
+      case AggCount => Count(Seq(attr))
+    }
+    val tree = new WindowSegmentTree(
+      Array(agg), schema, newMutableProjection,
+      c.fanout, c.blockSize, maxCachedBlocks = None,
+      taskMemoryManager = TaskContext.get().taskMemoryManager())
+    val rows = c.values.iterator.map { opt =>
+      val r = new GenericInternalRow(1)
+      opt match {
+        case Some(v) => c.dataType match {
+          case IntegerType => r.update(0, v.toInt)
+          case LongType => r.update(0, v)
+          case _ => throw new IllegalStateException
+        }
+        case None => r.setNullAt(0)
+      }
+      r.asInstanceOf[InternalRow]
+    }
+    SegmentTreeWindowTestHelper.buildTreeFromIter(tree, rows, schema)
+    (tree, attr)
+  }
+
+  private def queryResult(
+      tree: WindowSegmentTree, outDt: DataType, lo: Int, hi: Int): Option[Long] = {
+    val out = new SpecificInternalRow(Seq[DataType](outDt))
+    tree.query(lo, hi, out)
+    if (out.isNullAt(0)) None
+    else outDt match {
+      case IntegerType => Some(out.getInt(0).toLong)
+      case LongType => Some(out.getLong(0))
+      case _ => throw new IllegalStateException
+    }
+  }
+
+  // Properties
+  /**
+   * P1 Equivalence: segtree query result equals naive aggregate over random
+   * frames. Probes multiple frames per case (empty / degenerate / full /
+   * random) to amplify block/fanout edge coverage.
+   */
+  test("P1 equivalence: segtree query equals naive aggregate over random frames") {
+    withTaskContext {
+      forAll(genCase) { c =>
+        val (tree, _) = buildTreeFor(c)
+        val outDt = aggOutputType(c)
+        try {
+          val frames = boundaryFrames(c.n) ++ randomFrames(c.n, k = 8, seed = c.n.toLong)
+          frames.foreach { case (lo, hi) =>
+            val expected = naiveAgg(c.values, lo, hi, c.agg)
+            val actual = queryResult(tree, outDt, lo, hi)
+            assert(actual == expected,
+              s"P1 mismatch: n=${c.n} agg=${c.agg} blockSize=${c.blockSize} " +
+                s"fanout=${c.fanout} dt=${c.dataType} frame=[$lo,$hi) " +
+                s"expected=$expected actual=$actual")
+          }
+        } finally tree.close()
+      }
+    }
+  }
+
+  /** Boundary frames: empty, full, head, tail, single-element. */
+  private def boundaryFrames(n: Int): Seq[(Int, Int)] = {
+    if (n == 0) Seq((0, 0))
+    else Seq(
+      (0, 0),            // empty at start
+      (n, n),            // empty at end
+      (0, n),            // full
+      (0, 1),            // single head
+      (n - 1, n),        // single tail
+      (n / 2, n / 2),    // empty middle
+      (0, n / 2),        // prefix
+      (n / 2, n)         // suffix
+    ).distinct
+  }
+
+  /** Deterministic random frames; seed tied to case to aid shrink repro. */
+  private def randomFrames(n: Int, k: Int, seed: Long): Seq[(Int, Int)] = {
+    if (n == 0) return Nil
+    val rnd = new scala.util.Random(seed)
+    Seq.fill(k) {
+      val a = rnd.nextInt(n + 1)
+      val b = rnd.nextInt(n + 1)
+      if (a <= b) (a, b) else (b, a)
+    }
+  }
+
+  /**
+   * P2 Determinism (10-case smoke): rebuild + requery yields identical
+   * results. Segtree has no RNG/hash iteration; acts as low-cost regression
+   * sentinel against future non-deterministic refactors.
+   */
+  test("P2 determinism: rebuild + requery yields identical results") {
+    withTaskContext {
+      forAll(genCase, minSuccessful(10)) { c =>
+        val frames = boundaryFrames(c.n) ++ randomFrames(c.n, k = 2, seed = 0xD2L)
+        val run1 = runFrames(c, frames)
+        val run2 = runFrames(c, frames)
+        assert(run1 == run2,
+          s"P2 mismatch: n=${c.n} agg=${c.agg} blockSize=${c.blockSize} " +
+            s"fanout=${c.fanout} dt=${c.dataType} run1=$run1 run2=$run2")
+      }
+    }
+  }
+
+  private def runFrames(c: PbtCase, frames: Seq[(Int, Int)]): Seq[Option[Long]] = {
+    val (tree, _) = buildTreeFor(c)
+    val outDt = aggOutputType(c)
+    try {
+      frames.map { case (lo, hi) => queryResult(tree, outDt, lo, hi) }
+    } finally tree.close()
+  }
+
+  // Floating-point family (Avg / StddevSamp / StddevPop).
+  // Welford merge order in segtree differs from row-by-row SlidingWindowFrame,
+  // so equality is NOT bit-exact; compared with relative tol 1e-9 + absolute
+  // floor for near-zero (e.g. constant-input stddev). Kept separate to avoid
+  // muddying the integer-family exact-equality contract.
+
+  sealed trait FpAggKind
+  case object FpAvg extends FpAggKind
+  case object FpStddevSamp extends FpAggKind
+  case object FpStddevPop extends FpAggKind
+
+  private case class FpCase(
+      values: IndexedSeq[Option[Double]],
+      agg: FpAggKind,
+      blockSize: Int,
+      fanout: Int) {
+    def n: Int = values.length
+  }
+
+  /**
+   * 20% null, 80% bounded gaussian-ish double. Range chosen so partial
+   * sums stay well within Double precision over n <= 5000.
+   */
+  private val genFpValue: Gen[Option[Double]] = {
+    val bounded: Gen[Double] = Gen.choose(-1e6, 1e6)
+    Gen.frequency(
+      (1, Gen.const(None: Option[Double])),
+      (4, bounded.map(Some(_)))
+    )
+  }
+
+  private val genFpAgg: Gen[FpAggKind] =
+    Gen.oneOf(FpAvg, FpStddevSamp, FpStddevPop)
+
+  private val genFpCase: Gen[FpCase] = for {
+    agg <- genFpAgg
+    n <- genN
+    values <- Gen.listOfN(n, genFpValue).map(_.toIndexedSeq)
+    blockSize <- genBlockSize
+    fanout <- genFanout
+  } yield FpCase(values, agg, blockSize, fanout)
+
+  /**
+   * Naive oracle for FP aggregates over [lo, hi). Returns None when Spark
+   * would emit NULL: empty frame, all-null frame, and (for StddevSamp)
+   * count &lt; 2.
+   */
+  private def naiveFpAgg(
+      values: IndexedSeq[Option[Double]],
+      lo: Int, hi: Int,
+      kind: FpAggKind): Option[Double] = {
+    val nonNull = (lo until hi).flatMap(values).toArray
+    val n = nonNull.length
+    if (n == 0) return None
+    val mean = nonNull.sum / n
+    kind match {
+      case FpAvg => Some(mean)
+      case FpStddevSamp =>
+        if (n < 2) None
+        else {
+          var sq = 0.0
+          var i = 0
+          while (i < n) { val d = nonNull(i) - mean; sq += d * d; i += 1 }
+          Some(math.sqrt(sq / (n - 1)))
+        }
+      case FpStddevPop =>
+        var sq = 0.0
+        var i = 0
+        while (i < n) { val d = nonNull(i) - mean; sq += d * d; i += 1 }
+        Some(math.sqrt(sq / n))
+    }
+  }
+
+  /** Build tree + matching AggregateProcessor so we can `queryInto` to evaluate. */
+  private def buildFpTreeFor(
+      c: FpCase): (WindowSegmentTree, AggregateProcessor) = {
+    val attr = AttributeReference("v", DoubleType, nullable = true)()
+    val schema: Seq[Attribute] = Seq(attr)
+    val agg: DeclarativeAggregate = c.agg match {
+      case FpAvg => Average(attr)
+      case FpStddevSamp => StddevSamp(attr)
+      case FpStddevPop => StddevPop(attr)
+    }
+    val tree = new WindowSegmentTree(
+      Array(agg), schema, newMutableProjection,
+      c.fanout, c.blockSize, maxCachedBlocks = None,
+      taskMemoryManager = TaskContext.get().taskMemoryManager())
+    val rows = c.values.iterator.map { opt =>
+      val r = new GenericInternalRow(1)
+      opt match {
+        case Some(v) => r.update(0, v)
+        case None => r.setNullAt(0)
+      }
+      r.asInstanceOf[InternalRow]
+    }
+    SegmentTreeWindowTestHelper.buildTreeFromIter(tree, rows, schema)
+    val processor = AggregateProcessor(
+      Array[Expression](agg),
+      ordinal = 0,
+      inputAttributes = schema,
+      newMutableProjection = newMutableProjection,
+      filters = Array[Option[Expression]](None))
+    (tree, processor)
+  }
+
+  private def queryFpResult(
+      tree: WindowSegmentTree,
+      processor: AggregateProcessor,
+      lo: Int, hi: Int): Option[Double] = {
+    val out = new SpecificInternalRow(Seq[DataType](DoubleType))
+    tree.queryInto(lo, hi, processor, out)
+    if (out.isNullAt(0)) None else Some(out.getDouble(0))
+  }
+
+  private def fpClose(actual: Option[Double], expected: Option[Double]): Boolean =
+    (actual, expected) match {
+      case (None, None) => true
+      case (Some(a), Some(e)) =>
+        if (java.lang.Double.isNaN(a) && java.lang.Double.isNaN(e)) true
+        else {
+          val absDiff = math.abs(a - e)
+          val denom = math.max(math.abs(e), 1.0)  // absolute floor for near-zero
+          absDiff / denom < 1e-9
+        }
+      case _ => false
+    }
+
+  /**
+   * P4 Equivalence (FP family): segtree avg/stddev within 1e-9 relative
+   * tolerance of naive oracle. Smaller minSuccessful (50) than P1 because
+   * Welford-merge is heavier than Min/Max/Sum/Count.
+   */
+  test("P4 fp equivalence: segtree avg/stddev within 1e-9 of naive oracle") {
+    withTaskContext {
+      forAll(genFpCase, minSuccessful(50)) { c =>
+        val (tree, processor) = buildFpTreeFor(c)
+        try {
+          val frames = boundaryFrames(c.n) ++ randomFrames(c.n, k = 6, seed = c.n.toLong)
+          frames.foreach { case (lo, hi) =>
+            val expected = naiveFpAgg(c.values, lo, hi, c.agg)
+            val actual = queryFpResult(tree, processor, lo, hi)
+            assert(fpClose(actual, expected),
+              s"P4 fp mismatch: n=${c.n} agg=${c.agg} blockSize=${c.blockSize} " +
+                s"fanout=${c.fanout} frame=[$lo,$hi) " +
+                s"expected=$expected actual=$actual")
+          }
+        } finally tree.close()
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeSuite.scala
new file mode 100644
index 0000000000000..443d160394fac
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/window/WindowSegmentTreeSuite.scala
@@ -0,0 +1,521 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import scala.util.Random
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv, SparkException, SparkFunSuite, TaskContext}
+import org.apache.spark.memory.MemoryTestingUtils
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow, MutableProjection, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{DeclarativeAggregate, Max, Min, StddevSamp, Sum}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
+import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType}
+
+class WindowSegmentTreeSuite extends SparkFunSuite with LocalSparkContext {
+
+  // test harness
+  private val inputAttr: AttributeReference =
+    AttributeReference("v", IntegerType, nullable = true)()
+  private val inputSchema: Seq[Attribute] = Seq(inputAttr)
+
+  private def newMutableProjection
+      : (Seq[Expression], Seq[Attribute]) => MutableProjection =
+    (exprs, attrs) => GenerateMutableProjection.generate(exprs, attrs)
+
+  private def minAgg: DeclarativeAggregate = Min(inputAttr)
+
+  private def withTaskContext[T](body: => T): T = {
+    sc = new SparkContext("local", "test", new SparkConf(false))
+    val taskContext = MemoryTestingUtils.fakeTaskContext(SparkEnv.get)
+    TaskContext.setTaskContext(taskContext)
+    try body finally {
+      TaskContext.unset()
+    }
+  }
+
+  private def buildTree(
+      values: Seq[Int],
+      aggs: Array[DeclarativeAggregate] = Array(minAgg),
+      fanout: Int = WindowSegmentTree.DefaultFanout,
+      blockSize: Int = WindowSegmentTree.DefaultBlockSize,
+      maxCachedBlocks: Option[Int] = None): WindowSegmentTree = {
+    val tree = new WindowSegmentTree(
+      aggs, inputSchema, newMutableProjection, fanout, blockSize, maxCachedBlocks,
+      taskMemoryManager = TaskContext.get().taskMemoryManager())
+    val rows = values.iterator.map { v =>
+      val r = new GenericInternalRow(1)
+      r.update(0, v)
+      r.asInstanceOf[InternalRow]
+    }
+    SegmentTreeWindowTestHelper.buildTreeFromIter(tree, rows, inputSchema)
+    tree
+  }
+
+  /** Naive oracle: MIN over values[lo, hi). Returns Int box or null. */
+  private def naiveMin(values: Seq[Int], lo: Int, hi: Int): Any = {
+    if (lo >= hi) null
+    else {
+      var m = values(lo)
+      var i = lo + 1
+      while (i < hi) { if (values(i) < m) m = values(i); i += 1 }
+      m.asInstanceOf[java.lang.Integer]
+    }
+  }
+
+  private def newOutBuffer(): SpecificInternalRow =
+    new SpecificInternalRow(Seq[org.apache.spark.sql.types.DataType](IntegerType))
+
+  private def queryMin(tree: WindowSegmentTree, lo: Int, hi: Int): Any = {
+    val out = newOutBuffer()
+    tree.query(lo, hi, out)
+    if (out.isNullAt(0)) null else out.getInt(0)
+  }
+
+  test("build and single-point query returns identity; full scan matches naive") {
+    withTaskContext {
+      val values = Seq(5, 2, 9, 1, 7, 3, 4, 8, 6, 0)
+      val tree = buildTree(values, fanout = 4, blockSize = 1024)
+      try {
+        // single-point queries -> identity (null for MIN)
+        for (i <- 0 to values.length) {
+          assert(queryMin(tree, i, i) == null, s"identity at i=$i")
+        }
+        // full scan
+        assert(queryMin(tree, 0, values.length) == naiveMin(values, 0, values.length))
+      } finally tree.close()
+    }
+  }
+
+  test("single-block: range query matches naive baseline for random ranges") {
+    withTaskContext {
+      val rnd = new Random(0xC0FFEE)
+      val values = Seq.fill(100)(rnd.nextInt(1000))
+      val tree = buildTree(values, fanout = 8, blockSize = 1024)
+      try {
+        for (_ <- 0 until 20) {
+          val a = rnd.nextInt(values.length + 1)
+          val b = rnd.nextInt(values.length + 1)
+          val (lo, hi) = (math.min(a, b), math.max(a, b))
+          assert(queryMin(tree, lo, hi) == naiveMin(values, lo, hi),
+            s"mismatch at [$lo, $hi)")
+        }
+      } finally tree.close()
+    }
+  }
+
+  test("fanout boundaries: sizes {1, F, F+1, F*F} for fanout in {2,4,8,16}") {
+    withTaskContext {
+      val rnd = new Random(42)
+      for (fanout <- Seq(2, 4, 8, 16)) {
+        val sizes = Seq(1, fanout, fanout + 1, fanout * fanout).distinct
+        for (n <- sizes) {
+          val values = Seq.fill(n)(rnd.nextInt(10000))
+          val tree = buildTree(values, fanout = fanout, blockSize = 1024 * 1024)
+          try {
+            // Exhaustive ranges for small n.
+            for (lo <- 0 to n; hi <- lo to n) {
+              assert(queryMin(tree, lo, hi) == naiveMin(values, lo, hi),
+                s"fanout=$fanout n=$n [$lo,$hi)")
+            }
+            // Level-size invariant: ceil(prev / fanout)
+            if (n > 0) {
+              val numLevels = tree.peekLevelCount(0)
+              var prev = n
+              for (l <- 1 until numLevels) {
+                val expected = math.ceil(prev.toDouble / fanout).toInt
+                val actual = tree.peekLevelSize(0, l)
+                assert(actual == expected,
+                  s"level-size wrong at fanout=$fanout n=$n level=$l: $actual vs $expected")
+                prev = expected
+              }
+            }
+          } finally tree.close()
+        }
+      }
+    }
+  }
+
+  test("identity at empty range query(k, k)") {
+    withTaskContext {
+      val values = (1 to 50).reverse
+      val tree = buildTree(values, fanout = 4, blockSize = 16)
+      try {
+        for (k <- Seq(0, values.length / 2, values.length)) {
+          assert(queryMin(tree, k, k) == null, s"identity at k=$k")
+        }
+      } finally tree.close()
+    }
+  }
+
+  test("block boundary correctness: cross-block vs single-block baseline") {
+    withTaskContext {
+      val rnd = new Random(123)
+      val values = Seq.fill(100)(rnd.nextInt(10000))
+      val treeBlocked = buildTree(values, fanout = 4, blockSize = 16)
+      val treeBaseline = buildTree(values, fanout = 4, blockSize = 1024)
+      try {
+        assert(treeBlocked.peekBlockCount == (100 + 16 - 1) / 16)
+        for (lo <- 0 to 100; hi <- lo to 100) {
+          assert(queryMin(treeBlocked, lo, hi) == queryMin(treeBaseline, lo, hi),
+            s"cross-block mismatch at [$lo, $hi)")
+        }
+      } finally {
+        treeBlocked.close()
+        treeBaseline.close()
+      }
+    }
+  }
+
+  test("LRU stability: same queries in different orders produce same results") {
+    withTaskContext {
+      val rnd = new Random(777)
+      val values = Seq.fill(100)(rnd.nextInt(10000))
+      val tree = buildTree(
+        values, fanout = 4, blockSize = 16, maxCachedBlocks = Some(2))
+      try {
+        val queries = Seq.fill(30) {
+          val a = rnd.nextInt(101)
+          val b = rnd.nextInt(101)
+          (math.min(a, b), math.max(a, b))
+        }
+        val results1 = queries.map { case (lo, hi) => queryMin(tree, lo, hi) }
+        val reordered = rnd.shuffle(queries.zipWithIndex)
+        val results2 = Array.fill[Any](queries.length)(null)
+        reordered.foreach { case ((lo, hi), idx) =>
+          results2(idx) = queryMin(tree, lo, hi)
+        }
+        for (i <- queries.indices) {
+          assert(results1(i) == results2(i),
+            s"LRU instability at query $i ${queries(i)}: ${results1(i)} vs ${results2(i)}")
+        }
+        // Also cross-check against naive oracle.
+        for (i <- queries.indices) {
+          val (lo, hi) = queries(i)
+          assert(results1(i) == naiveMin(values, lo, hi))
+        }
+      } finally tree.close()
+    }
+  }
+
+  test("cross-block: range query matches naive baseline for random ranges") {
+    withTaskContext {
+      val rnd = new Random(0xBEEF)
+      val values = Seq.fill(100)(rnd.nextInt(1000))
+      val tree = buildTree(values, fanout = 4, blockSize = 8)
+      try {
+        assert(tree.peekBlockCount == (100 + 8 - 1) / 8)
+        for (_ <- 0 until 50) {
+          val a = rnd.nextInt(values.length + 1)
+          val b = rnd.nextInt(values.length + 1)
+          val (lo, hi) = (math.min(a, b), math.max(a, b))
+          assert(queryMin(tree, lo, hi) == naiveMin(values, lo, hi),
+            s"mismatch at [$lo, $hi)")
+        }
+      } finally tree.close()
+    }
+  }
+
+  test("cross-block: multi-block level-size invariant") {
+    withTaskContext {
+      val rnd = new Random(31337)
+      val fanout = 4
+      val blockSize = 8
+      val numRows = 50 // > blockSize -> multiple blocks
+      val values = Seq.fill(numRows)(rnd.nextInt(10000))
+      val tree = buildTree(values, fanout = fanout, blockSize = blockSize)
+      try {
+        val numBlocks = tree.peekBlockCount
+        assert(numBlocks > 1, s"expected >1 block, got $numBlocks")
+        for (b <- 0 until numBlocks) {
+          val blockStart = b * blockSize
+          val blockRows = math.min(blockSize, numRows - blockStart)
+          val numLevels = tree.peekLevelCount(b)
+          var prev = blockRows
+          for (l <- 1 until numLevels) {
+            val expected = math.ceil(prev.toDouble / fanout).toInt
+            val actual = tree.peekLevelSize(b, l)
+            assert(actual == expected,
+              s"block=$b level=$l: $actual vs $expected")
+            prev = expected
+          }
+        }
+        // Correctness cross-check.
+        for (lo <- 0 to numRows; hi <- lo to numRows) {
+          assert(queryMin(tree, lo, hi) == naiveMin(values, lo, hi),
+            s"cross-block MIN mismatch at [$lo, $hi)")
+        }
+      } finally tree.close()
+    }
+  }
+
+  // D8 multi-aggregate
+  test("D8 multi-aggregate: MIN + MAX + SUM on the same tree") {
+    withTaskContext {
+      val rnd = new Random(2024)
+      val numRows = 50
+      val values = Seq.fill(numRows)(rnd.nextInt(1000))
+      val aggs: Array[DeclarativeAggregate] =
+        Array(Min(inputAttr), Max(inputAttr), Sum(inputAttr))
+      val tree = buildTree(values, aggs = aggs, fanout = 4, blockSize = 8)
+      // Output schema: MIN(int), MAX(int), SUM(long).
+      // Sum on IntegerType widens the buffer slot to LongType.
+      val outTypes: Seq[DataType] = Seq(IntegerType, IntegerType, LongType)
+      def queryAll(lo: Int, hi: Int): (Any, Any, Any) = {
+        val out = new SpecificInternalRow(outTypes)
+        tree.query(lo, hi, out)
+        val mn = if (out.isNullAt(0)) null else out.getInt(0)
+        val mx = if (out.isNullAt(1)) null else out.getInt(1)
+        val sm = if (out.isNullAt(2)) null else out.getLong(2)
+        (mn, mx, sm)
+      }
+      def naiveMax(vs: Seq[Int], lo: Int, hi: Int): Any =
+        if (lo >= hi) null else vs.slice(lo, hi).max
+      def naiveSum(vs: Seq[Int], lo: Int, hi: Int): Any =
+        if (lo >= hi) null else vs.slice(lo, hi).map(_.toLong).sum
+      try {
+        for (_ <- 0 until 20) {
+          val a = rnd.nextInt(numRows + 1)
+          val b = rnd.nextInt(numRows + 1)
+          val (lo, hi) = (math.min(a, b), math.max(a, b))
+          val (mn, mx, sm) = queryAll(lo, hi)
+          assert(mn == naiveMin(values, lo, hi), s"MIN at [$lo,$hi)")
+          assert(mx == naiveMax(values, lo, hi), s"MAX at [$lo,$hi)")
+          assert(sm == naiveSum(values, lo, hi), s"SUM at [$lo,$hi)")
+        }
+      } finally tree.close()
+    }
+  }
+
+  // D9 spill coverage
+  test("D9 spill path: low thresholds still produce correct range aggregates") {
+    withTaskContext {
+      val rnd = new Random(909)
+      val numRows = 60
+      val values = Seq.fill(numRows)(rnd.nextInt(1000))
+      val tree = new WindowSegmentTree(
+        Array(minAgg), inputSchema, newMutableProjection,
+        fanout = 4, blockSize = 8, maxCachedBlocks = Some(2),
+        taskMemoryManager = TaskContext.get().taskMemoryManager())
+      val rows = values.iterator.map { v =>
+        val r = new GenericInternalRow(1); r.update(0, v); r.asInstanceOf[InternalRow]
+      }
+      SegmentTreeWindowTestHelper.buildTreeFromIter(
+        tree, rows, inputSchema, inMemoryThreshold = 4, spillThreshold = 8)
+      try {
+        for (_ <- 0 until 40) {
+          val a = rnd.nextInt(numRows + 1)
+          val b = rnd.nextInt(numRows + 1)
+          val (lo, hi) = (math.min(a, b), math.max(a, b))
+          assert(queryMin(tree, lo, hi) == naiveMin(values, lo, hi),
+            s"spill-path mismatch at [$lo, $hi)")
+        }
+      } finally tree.close()
+    }
+  }
+
+  test("D10 rebuild: second build replaces state; failed build preserves prior state") {
+    withTaskContext {
+      val v1 = Seq(5, 1, 9, 3, 7, 2, 8, 4, 6, 0)
+      val v2 = Seq(100, 200, 50, 400, 25, 600, 12, 800)
+      val v3 = Seq(10, 20, 30, 40, 50, 60, 70, 80, 90, 5)
+      val tree = new WindowSegmentTree(
+        Array(minAgg), inputSchema, newMutableProjection,
+        fanout = 4, blockSize = 4,
+        taskMemoryManager = TaskContext.get().taskMemoryManager())
+
+      def iterOf(vs: Seq[Int]): Iterator[InternalRow] = vs.iterator.map { v =>
+        val r = new GenericInternalRow(1); r.update(0, v); r.asInstanceOf[InternalRow]
+      }
+
+      try {
+        SegmentTreeWindowTestHelper.buildTreeFromIter(tree, iterOf(v1), inputSchema)
+        assert(queryMin(tree, 0, v1.length) == v1.min)
+        SegmentTreeWindowTestHelper.buildTreeFromIter(tree, iterOf(v2), inputSchema)
+        assert(tree.size == v2.length)
+        for (lo <- 0 to v2.length; hi <- lo to v2.length) {
+          assert(queryMin(tree, lo, hi) == naiveMin(v2, lo, hi),
+            s"post-rebuild mismatch at [$lo, $hi)")
+        }
+
+        // Now simulate a failing build midway through; prior state must remain queryable.
+        val boomIter: Iterator[InternalRow] = new Iterator[InternalRow] {
+          private var emitted = 0
+          override def hasNext: Boolean = true
+          override def next(): InternalRow = {
+            if (emitted >= 3) throw new RuntimeException("boom")
+            emitted += 1
+            val r = new GenericInternalRow(1); r.update(0, -1); r.asInstanceOf[InternalRow]
+          }
+        }
+        intercept[RuntimeException](
+          SegmentTreeWindowTestHelper.buildTreeFromIter(tree, boomIter, inputSchema))
+        // Prior v2 state intact.
+        assert(tree.size == v2.length)
+        assert(queryMin(tree, 0, v2.length) == v2.min)
+
+        // Build v3 successfully after the failure.
+        SegmentTreeWindowTestHelper.buildTreeFromIter(tree, iterOf(v3), inputSchema)
+        assert(tree.size == v3.length)
+        for (lo <- 0 to v3.length; hi <- lo to v3.length) {
+          assert(queryMin(tree, lo, hi) == naiveMin(v3, lo, hi),
+            s"post-recovery mismatch at [$lo, $hi)")
+        }
+      } finally tree.close()
+    }
+  }
+
+  test("D11 error paths: invalid ctor args and invalid query ranges") {
+    withTaskContext {
+      // Constructor validation.
+      intercept[IllegalArgumentException] {
+        new WindowSegmentTree(Array(minAgg), inputSchema, newMutableProjection, fanout = 1)
+      }
+      intercept[IllegalArgumentException] {
+        new WindowSegmentTree(Array(minAgg), inputSchema, newMutableProjection, blockSize = 0)
+      }
+      intercept[IllegalArgumentException] {
+        new WindowSegmentTree(
+          Array(minAgg), inputSchema, newMutableProjection, maxCachedBlocks = Some(0))
+      }
+      intercept[IllegalArgumentException] {
+        new WindowSegmentTree(
+          Array(minAgg), inputSchema, newMutableProjection, maxCachedBlocks = Some(-1))
+      }
+
+      // Query range validation.
+      val values = Seq(3, 1, 4, 1, 5, 9, 2, 6, 5, 3)
+      val tree = buildTree(values, fanout = 4, blockSize = 4)
+      try {
+        // Pre-fill outBuffer with a sentinel, then assert bounds check leaves it alone.
+        def sentinelBuf(): SpecificInternalRow = {
+          val b = newOutBuffer()
+          b.setInt(0, 0x5EEDED)
+          b
+        }
+        val sz = tree.size
+        for ((lo, hi) <- Seq((-1, 5), (0, sz + 1), (5, 3))) {
+          val out = sentinelBuf()
+          val ex = intercept[SparkException](tree.query(lo, hi, out))
+          assert(ex.getCondition == "INTERNAL_ERROR")
+          assert(!out.isNullAt(0) && out.getInt(0) == 0x5EEDED,
+            s"outBuffer mutated by invalid query [$lo,$hi)")
+        }
+      } finally tree.close()
+    }
+  }
+
+  // STDDEV_SAMP regression: minimal repro for the digest mismatch in GHA run
+  // 24599916378 (WindowBenchmark Section A). MIN/MAX/SUM/COUNT/AVG pass
+  // digest parity; STDDEV_SAMP is the first multi-buffer agg combining
+  // CentralMomentAgg's Welford merge (n, avg, m2) with a non-trivial
+  // `evaluateExpression`. Exercises both merge (`query`) and evaluate
+  // (`queryInto` + `AggregateProcessor`) paths.
+  test("STDDEV_SAMP: segtree matches naive oracle on random doubles, W=21") {
+    withTaskContext {
+      val rnd = new Random(0x57DDEFL) // fixed seed
+      val n = 100
+      val values: Array[Double] = Array.fill(n)(rnd.nextGaussian() * 1000.0 + 50.0)
+      val doubleAttr = AttributeReference("v", DoubleType, nullable = true)()
+      val schema: Seq[Attribute] = Seq(doubleAttr)
+      val agg: DeclarativeAggregate = StddevSamp(doubleAttr)
+
+      // Use a block size that forces cross-block merges at W=21: blockSize=8.
+      val tree = new WindowSegmentTree(
+        Array(agg), schema, newMutableProjection,
+        fanout = 4, blockSize = 8, maxCachedBlocks = None,
+        taskMemoryManager = TaskContext.get().taskMemoryManager())
+      try {
+        val rows = values.iterator.map { v =>
+          val r = new GenericInternalRow(1)
+          r.update(0, v)
+          r.asInstanceOf[InternalRow]
+        }
+        SegmentTreeWindowTestHelper.buildTreeFromIter(tree, rows, schema)
+
+        // AggregateProcessor identical to the Frame's, so `queryInto`
+        // exercises evaluateExpression (sqrt(m2/(n-1)) with n=1 / div-by-0
+        // guards in StddevSamp).
+        val processor = AggregateProcessor(
+          Array[Expression](agg),
+          ordinal = 0,
+          inputAttributes = schema,
+          newMutableProjection = newMutableProjection,
+          filters = Array[Option[Expression]](None))
+
+        val out = new SpecificInternalRow(Seq[DataType](DoubleType))
+
+        val halfW = 10 // W=21
+        var i = 0
+        var maxRelErr = 0.0
+        while (i < n) {
+          val lo = math.max(0, i - halfW)
+          val hi = math.min(n, i + halfW + 1)
+          tree.queryInto(lo, hi, processor, out)
+          val actual = if (out.isNullAt(0)) Double.NaN else out.getDouble(0)
+          val expected = naiveStddevSamp(values, lo, hi)
+          // NaN handling: legacy `nullOnDivideByZero=true` -> null when n<=1.
+          if (java.lang.Double.isNaN(expected)) {
+            assert(out.isNullAt(0),
+              s"expected null at i=$i [$lo,$hi), got $actual")
+          } else {
+            assert(!out.isNullAt(0),
+              s"expected $expected at i=$i [$lo,$hi), got null")
+            val denom = math.max(math.abs(expected), 1e-12)
+            val rel = math.abs(actual - expected) / denom
+            if (rel > maxRelErr) maxRelErr = rel
+            assert(rel < 1e-9,
+              s"STDDEV_SAMP mismatch at i=$i [$lo,$hi): " +
+                s"expected=$expected actual=$actual relErr=$rel")
+          }
+          i += 1
+        }
+      } finally tree.close()
+    }
+  }
+
+  /** Naive STDDEV_SAMP over [lo, hi); returns NaN for n < 2 to signal null. */
+  private def naiveStddevSamp(values: Array[Double], lo: Int, hi: Int): Double = {
+    val n = hi - lo
+    if (n < 2) return Double.NaN
+    var sum = 0.0
+    var i = lo
+    while (i < hi) { sum += values(i); i += 1 }
+    val mean = sum / n
+    var sq = 0.0
+    i = lo
+    while (i < hi) { val d = values(i) - mean; sq += d * d; i += 1 }
+    math.sqrt(sq / (n - 1))
+  }
+
+  test("D12 block-aligned cross-block boundaries") {
+    withTaskContext {
+      val rnd = new Random(12)
+      val numRows = 50
+      val blockSize = 10
+      val values = Seq.fill(numRows)(rnd.nextInt(10000))
+      val tree = buildTree(values, fanout = 4, blockSize = blockSize)
+      try {
+        for ((lo, hi) <- Seq((0, 20), (10, 40), (20, 50), (0, 50))) {
+          assert(queryMin(tree, lo, hi) == naiveMin(values, lo, hi),
+            s"aligned mismatch at [$lo,$hi)")
+        }
+      } finally tree.close()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index f79e639216277..ea730abf67d66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -35,6 +35,8 @@ import org.apache.spark.sql.catalyst.{analysis, TableIdentifier}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, DateTimeTestUtils}
+import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, FieldReference}
+import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, Predicate}
 import org.apache.spark.sql.execution.{DataSourceScanExec, ExtendedMode, ProjectExec}
 import org.apache.spark.sql.execution.command.{ExplainCommand, ShowCreateTableCommand}
 import org.apache.spark.sql.execution.datasources.{LogicalRelation, LogicalRelationWithTable}
@@ -892,6 +894,55 @@ class JDBCSuite extends SharedSparkSession {
     assert(doCompileFilter(EqualTo("col0.nested", 3)).isEmpty)
   }
 
+  test("SPARK-53454: AlwaysTrue/AlwaysFalse compile to portable SQL in JDBCSQLBuilder") {
+    val dialect = JdbcDialects.get("jdbc:")
+    assert(dialect.compileExpression(new AlwaysTrue).get === "(1 = 1)")
+    assert(dialect.compileExpression(new AlwaysFalse).get === "(1 = 0)")
+
+    // The result must stay valid when AlwaysTrue/AlwaysFalse is nested as an operand
+    // of a larger expression, not just as a standalone WHERE predicate. Without the
+    // surrounding parentheses the bare `1 = 1` would inline into invalid SQL such as
+    // `a = 1 = 1`.
+    val ref = FieldReference("a")
+    val eqTrue = new Predicate("=", Array[V2Expression](ref, new AlwaysTrue))
+    val eqFalse = new Predicate("=", Array[V2Expression](ref, new AlwaysFalse))
+    assert(dialect.compileExpression(eqTrue).get === "\"a\" = (1 = 1)")
+    assert(dialect.compileExpression(eqFalse).get === "\"a\" = (1 = 0)")
+  }
+
+  test("SPARK-57332: escape backslash in LIKE pattern for STARTS_WITH/ENDS_WITH/CONTAINS") {
+    // Default dialect: standard SQL string literals take backslash verbatim, so the LIKE escape
+    // character `\` appears once in the ESCAPE clause and a literal backslash in the value is
+    // doubled once (by escapeSpecialCharsForLikePattern) to be matched literally.
+    val defaultDialect = JdbcDialects.get("jdbc:")
+    def defaultSQL(f: Filter): String = defaultDialect.compileExpression(f.toV2).getOrElse("")
+    // "c" LIKE 'ab\\%' ESCAPE '\'
+    assert(defaultSQL(StringStartsWith("c", "ab\\")) === """"c" LIKE 'ab\\%' ESCAPE '\'""")
+    // "c" LIKE '%\\ab' ESCAPE '\'
+    assert(defaultSQL(StringEndsWith("c", "\\ab")) === """"c" LIKE '%\\ab' ESCAPE '\'""")
+    // "c" LIKE '%a\\b%' ESCAPE '\'
+    assert(defaultSQL(StringContains("c", "a\\b")) === """"c" LIKE '%a\\b%' ESCAPE '\'""")
+
+    // MySQL treats backslash as an escape character inside string literals, so every backslash is
+    // doubled again: the ESCAPE clause uses `\\` and a literal backslash in the value becomes four
+    // backslashes (escapeSpecialCharsForLikePattern doubles it, then
+    // escapeStringLiteralForLikePattern doubles each of those). The wildcard escaping for
+    // `%`/`_` is unchanged from the default.
+    val mySQLDialect = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
+    def mySQLSQL(f: Filter): String = mySQLDialect.compileExpression(f.toV2).getOrElse("")
+    // `c` LIKE 'ab\\\\%' ESCAPE '\\'
+    assert(mySQLSQL(StringStartsWith("c", "ab\\")) === """`c` LIKE 'ab\\\\%' ESCAPE '\\'""")
+    // `c` LIKE '%\\\\ab' ESCAPE '\\'
+    assert(mySQLSQL(StringEndsWith("c", "\\ab")) === """`c` LIKE '%\\\\ab' ESCAPE '\\'""")
+    // `c` LIKE '%a\\\\b%' ESCAPE '\\'
+    assert(mySQLSQL(StringContains("c", "a\\b")) === """`c` LIKE '%a\\\\b%' ESCAPE '\\'""")
+    // Wildcards stay escaped: the `\` that escapeSpecialCharsForLikePattern puts before `%`/`_` is
+    // itself doubled for MySQL's string-literal layer, so it parses back to `\%`/`\_` (literal
+    // wildcards) before the LIKE engine, matching the default dialect's semantics.
+    // `c` LIKE 'a\\%b\\_%' ESCAPE '\\'
+    assert(mySQLSQL(StringStartsWith("c", "a%b_")) === """`c` LIKE 'a\\%b\\_%' ESCAPE '\\'""")
+  }
+
   test("Dialect unregister") {
     JdbcDialects.unregisterDialect(H2Dialect())
     try {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 95a0afa4d1506..ba6945c7b1fda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -214,6 +214,8 @@ class JDBCV2Suite extends SharedSparkSession with ExplainSuiteHelper {
       batchStmt.addBatch("INSERT INTO \"test\".\"address\" VALUES ('abc%_def@gmail.com')")
       batchStmt.addBatch("INSERT INTO \"test\".\"address\" VALUES ('abc_%def@gmail.com')")
       batchStmt.addBatch("INSERT INTO \"test\".\"address\" VALUES ('abc_''%def@gmail.com')")
+      batchStmt.addBatch(
+        "INSERT INTO \"test\".\"address\" VALUES ('abc\\def@gmail.com')")
 
       batchStmt.addBatch("CREATE TABLE \"test\".\"employee_bonus\" " +
         "(name TEXT(32), salary NUMERIC(20, 2), bonus DOUBLE, factor DOUBLE)")
@@ -1413,6 +1415,25 @@ class JDBCV2Suite extends SharedSparkSession with ExplainSuiteHelper {
     checkPushedInfo(df15,
       raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_''\%d%' ESCAPE '\']")
     checkAnswer(df15, Seq(Row("abc_'%def@gmail.com")))
+
+    // Backslash in the value must be escaped since '\' is the LIKE escape character
+    val df16 = spark.table("h2.test.address").filter($"email".startsWith("abc\\"))
+    checkFiltersRemoved(df16)
+    checkPushedInfo(df16,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\\%' ESCAPE '\']")
+    checkAnswer(df16, Seq(Row("abc\\def@gmail.com")))
+
+    val df17 = spark.table("h2.test.address").filter($"email".endsWith("\\def@gmail.com"))
+    checkFiltersRemoved(df17)
+    checkPushedInfo(df17,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\\def@gmail.com' ESCAPE '\']")
+    checkAnswer(df17, Seq(Row("abc\\def@gmail.com")))
+
+    val df18 = spark.table("h2.test.address").filter($"email".contains("c\\d"))
+    checkFiltersRemoved(df18)
+    checkPushedInfo(df18,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\\d%' ESCAPE '\']")
+    checkAnswer(df18, Seq(Row("abc\\def@gmail.com")))
   }
 
   test("scan with filter push-down with ansi mode") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
index a36570467a9df..da697847874dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
@@ -21,8 +21,10 @@ import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.plans.logical.CompoundBody
 import org.apache.spark.sql.catalyst.util.QuotingUtils.toSQLConf
+import org.apache.spark.sql.connector.catalog.{Aborted, Committed, Identifier, InMemoryRowLevelOperationTableCatalog, Txn, TxnTable, TxnTableCatalog}
 import org.apache.spark.sql.exceptions.SqlScriptingException
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
@@ -47,6 +49,27 @@ class SqlScriptingE2eSuite extends SharedSparkSession {
   }
 
   // Helpers
+  private def withCatalog(
+      name: String)(
+      f: InMemoryRowLevelOperationTableCatalog => Unit): Unit = {
+    withSQLConf(s"spark.sql.catalog.$name" ->
+        classOf[InMemoryRowLevelOperationTableCatalog].getName) {
+      val catalog = spark.sessionState.catalogManager
+        .catalog(name)
+        .asInstanceOf[InMemoryRowLevelOperationTableCatalog]
+      try f(catalog) finally spark.sessionState.catalogManager.reset()
+    }
+  }
+
+  private def loadTxnTable(
+      txn: Txn,
+      tableName: String,
+      namespace: Array[String] = Array("ns1")): TxnTable =
+    txn.catalog
+      .asInstanceOf[TxnTableCatalog]
+      .loadTable(Identifier.of(namespace, tableName))
+      .asInstanceOf[TxnTable]
+
   private def verifySqlScriptResult(
       sqlText: String,
       expected: Seq[Row],
@@ -174,6 +197,188 @@ class SqlScriptingE2eSuite extends SharedSparkSession {
     }
   }
 
+  test("multi statement with transactional checks - insert then delete") {
+    withCatalog("cat") { catalog =>
+      withTable("cat.ns1.t") {
+        val sqlScript =
+          """
+            |BEGIN
+            |  CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200, 'software');
+            |  DELETE FROM cat.ns1.t
+            |    WHERE pk IN (SELECT pk FROM cat.ns1.t WHERE dep = 'hr');
+            |  SELECT * FROM cat.ns1.t;
+            |END
+            |""".stripMargin
+
+        verifySqlScriptResult(sqlScript, Seq(Row(2, 200, "software")))
+
+        // Each DML statement in a script runs in its own independent QE and transaction.
+        assert(catalog.observedTransactions.size === 2)
+        assert(catalog.observedTransactions.forall(t =>
+          t.currentState === Committed && t.isClosed))
+
+        // The DELETE subquery scans the table with a dep='hr' predicate; verify it was tracked.
+        val deleteTxnTable = loadTxnTable(catalog.observedTransactions(1), "t")
+        assert(deleteTxnTable.scanEvents.flatten.exists {
+          case sources.EqualTo("dep", "hr") => true
+          case _ => false
+        })
+      }
+    }
+  }
+
+  test("multi statement with transactional checks - second statement fails") {
+    withCatalog("cat") { catalog =>
+      withTable("cat.ns1.t") {
+        val sqlScript =
+          """
+            |BEGIN
+            |  CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200, 'software');
+            |  DELETE FROM cat.ns1.t WHERE nonexistent_column = 1;
+            |END
+            |""".stripMargin
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            spark.sql(sqlScript).collect()
+          },
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          parameters = Map(
+            "objectName" -> "`nonexistent_column`",
+            "proposal" -> ".*"),
+          matchPVals = true,
+          queryContext = Array(ExpectedContext("nonexistent_column")))
+
+        // INSERT committed; DELETE was aborted because analysis failed on the bad column.
+        assert(catalog.observedTransactions.size === 2)
+        assert(catalog.observedTransactions(0).currentState === Committed)
+        assert(catalog.observedTransactions(0).isClosed)
+        assert(catalog.observedTransactions(1).currentState === Aborted)
+        assert(catalog.observedTransactions(1).isClosed)
+        assert(catalog.lastTransaction.currentState === Aborted)
+      }
+    }
+  }
+
+  test("multi statement with transactional checks - insert, merge, update") {
+    withCatalog("cat") { catalog =>
+      withTable("cat.ns1.t", "cat.ns1.src") {
+        val sqlScript =
+          """
+            |BEGIN
+            |  CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  CREATE TABLE cat.ns1.src (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200, 'software'), (3, 300, 'hr');
+            |  INSERT INTO cat.ns1.src VALUES (1, 150, 'hr'), (4, 400, 'finance');
+            |  MERGE INTO cat.ns1.t AS t
+            |    USING cat.ns1.src AS s
+            |    ON t.pk = s.pk
+            |    WHEN MATCHED THEN UPDATE SET salary = s.salary
+            |    WHEN NOT MATCHED THEN INSERT (pk, salary, dep)
+            |      VALUES (s.pk, s.salary, s.dep);
+            |  UPDATE cat.ns1.t SET salary = salary + 50 WHERE dep = 'software';
+            |  SELECT * FROM cat.ns1.t ORDER BY pk;
+            |END
+            |""".stripMargin
+
+        verifySqlScriptResult(
+          sqlScript,
+          Seq(
+            Row(1, 150, "hr"),
+            Row(2, 250, "software"),
+            Row(3, 300, "hr"),
+            Row(4, 400, "finance")))
+
+        // INSERT (x2), MERGE, and UPDATE each run in their own independent QE and transaction.
+        assert(catalog.observedTransactions.size === 4)
+        assert(catalog.observedTransactions.forall(t => t.currentState === Committed && t.isClosed))
+
+        def txnTable(txnIdx: Int): TxnTable =
+          loadTxnTable(catalog.observedTransactions(txnIdx), "t")
+
+        // Both inserts are pure writes - no scan.
+        assert(txnTable(0).scanEvents.isEmpty)
+        assert(txnTable(1).scanEvents.isEmpty)
+
+        // MERGE scans the full target table. The join is on pk (not the partition column).
+        assert(txnTable(2).scanEvents.nonEmpty)
+        assert(txnTable(2).scanEvents.flatten.isEmpty)
+
+        // UPDATE with WHERE dep='software' pushes an equality predicate on the partition column.
+        assert(txnTable(3).scanEvents.flatten.exists {
+          case sources.EqualTo("dep", "software") => true
+          case _ => false
+        })
+      }
+    }
+  }
+
+  test("loop with transactional checks - each iteration runs in its own transaction") {
+    withCatalog("cat") { catalog =>
+      withTable("cat.ns1.t") {
+        val sqlScript =
+          """
+            |BEGIN
+            |  DECLARE i INT = 1;
+            |  CREATE TABLE
+            |    cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  WHILE i <= 3 DO
+            |    INSERT INTO cat.ns1.t VALUES (i, i * 100, 'hr');
+            |    SET i = i + 1;
+            |  END WHILE;
+            |  SELECT * FROM cat.ns1.t ORDER BY pk;
+            |END
+            |""".stripMargin
+
+        verifySqlScriptResult(
+          sqlScript,
+          Seq(Row(1, 100, "hr"), Row(2, 200, "hr"), Row(3, 300, "hr")))
+
+        // Each loop iteration's INSERT runs in its own independent transaction.
+        assert(catalog.observedTransactions.size === 3)
+        assert(catalog.observedTransactions.forall(t => t.currentState === Committed && t.isClosed))
+      }
+    }
+  }
+
+  test("continue handler with transactional checks - handler DML runs in its own transaction") {
+    withCatalog("cat") { catalog =>
+      withTable("cat.ns1.t") {
+        val sqlScript =
+          """
+            |BEGIN
+            |  DECLARE CONTINUE HANDLER FOR DIVIDE_BY_ZERO
+            |  BEGIN
+            |    INSERT INTO cat.ns1.t VALUES (-1, -1, 'error');
+            |  END;
+            |  CREATE TABLE
+            |    cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  INSERT INTO cat.ns1.t VALUES (1, 100, 'hr');
+            |  SELECT 1/0;
+            |  INSERT INTO cat.ns1.t VALUES (2, 200, 'software');
+            |  SELECT * FROM cat.ns1.t ORDER BY pk;
+            |END
+            |""".stripMargin
+
+        verifySqlScriptResult(
+          sqlScript,
+          Seq(Row(-1, -1, "error"), Row(1, 100, "hr"), Row(2, 200, "software")))
+
+        // INSERT(1), handler INSERT(-1), INSERT(2) - each in its own transaction.
+        assert(catalog.observedTransactions.size === 3)
+        assert(catalog.observedTransactions.forall(t => t.currentState === Committed && t.isClosed))
+      }
+    }
+  }
+
   test("script without result statement") {
     val sqlScript =
       """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala
index 9a691d4430ef9..9e9991774992c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala
@@ -3691,7 +3691,9 @@ class SqlScriptingExecutionSuite extends SharedSparkSession {
       sqlState = "42883",
       parameters = Map(
         "variableName" -> toSQLId("LOCALVAR"),
-        "searchPath" -> toSQLId("SYSTEM.SESSION"))
+        "searchPath" ->
+          "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
+      context = ExpectedContext("LOCALVAR", 54, 61)
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 0e33b271522dc..a6067aaf189e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -598,6 +598,7 @@ trait StreamTest extends SharedSparkSession with TimeLimits {
             sparkSession
               .streams
               .startQuery(
+                None,
                 None,
                 Some(metadataRoot),
                 stream,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index 89f6556229527..e2c74533e7f3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -28,9 +28,10 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.connector.{FakeV2Provider, FakeV2ProviderWithCustomSchema, InMemoryTableSessionCatalog}
-import org.apache.spark.sql.connector.catalog.{Column, Identifier, InMemoryTableCatalog, MetadataColumn, SupportsMetadataColumns, SupportsRead, Table, TableCapability, TableInfo, V2TableWithV1Fallback}
+import org.apache.spark.sql.connector.catalog.{Column, Identifier, InMemoryTable, InMemoryTableCatalog, MetadataColumn, SupportsMetadataColumns, SupportsRead, Table, TableCapability, TableInfo, V2TableWithV1Fallback}
 import org.apache.spark.sql.connector.expressions.{ClusterByTransform, FieldReference, Transform}
-import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownRequiredColumns}
+import org.apache.spark.sql.connector.read.streaming.MicroBatchStream
 import org.apache.spark.sql.execution.streaming.runtime.{MemoryStream, MemoryStreamScanBuilder, StreamingQueryWrapper}
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf
@@ -109,20 +110,23 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   }
 
   test("read: read table without streaming capability support") {
-    val tableIdentifier = "testcat.table_name"
+    withSQLConf("spark.sql.catalog.testcat" ->
+        classOf[DataStreamTableAPISuite.NonStreamingInMemoryTableCatalog].getName) {
+      val tableIdentifier = "testcat.table_name"
 
-    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
+      spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
 
-    checkError(
-      exception = intercept[AnalysisException] {
-        spark.readStream.table(tableIdentifier)
-      },
-      condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
-      parameters = Map(
-        "tableName" -> "`testcat`.`table_name`",
-        "operation" -> "either micro-batch or continuous scan"
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.readStream.table(tableIdentifier)
+        },
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        parameters = Map(
+          "tableName" -> "`testcat`.`table_name`",
+          "operation" -> "either micro-batch or continuous scan"
+        )
       )
-    )
+    }
   }
 
   test("read: read table with custom catalog") {
@@ -545,6 +549,42 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("SPARK-56132: pruneColumns called on SupportsPushDownRequiredColumns " +
+      "V2 streaming scan builder") {
+    val tblName = "teststream.table_name"
+    withTable(tblName) {
+      spark.sql(s"CREATE TABLE $tblName (data int) USING foo")
+      val stream = MemoryStream[Int]
+      val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog
+      val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+        .asInstanceOf[InMemoryStreamTable]
+      table.setStream(stream)
+
+      // Wrap the table's scan builder so we can record pruneColumns calls.
+      val recorded = new PrunedSchemaRecorder
+      table.scanBuilderWrapper = Some(inner => new RecordingPruneScanBuilder(inner, recorded))
+
+      withTempDir { checkpointDir =>
+        val q = spark.readStream.table(tblName)
+          .select("value", "_seq")
+          .writeStream.format("noop")
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .start()
+        try {
+          // logicalPlan is initialized lazily when the query thread starts; wait for it.
+          eventually(timeout(streamingTimeout)) {
+            assert(recorded.called,
+              "pruneColumns should have been called on the streaming scan builder")
+          }
+          assert(recorded.schema.fieldNames.toSet === Set("value", "_seq"),
+            s"Expected pruneColumns to receive {value, _seq}, got ${recorded.schema}")
+        } finally {
+          q.stop()
+        }
+      }
+    }
+  }
+
   private def checkForStreamTable(dir: Option[File], tableName: String): Unit = {
     val memory = MemoryStream[Int]
     val dsw = memory.toDS().writeStream.format("parquet")
@@ -638,6 +678,25 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
 
 object DataStreamTableAPISuite {
   val V1FallbackTestTableName = "fallbackV1Test"
+
+  class NonStreamingInMemoryTableCatalog extends InMemoryTableCatalog {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    override def createTable(ident: Identifier, tableInfo: TableInfo): Table = {
+      if (tables.containsKey(ident)) {
+        throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
+      }
+      val tableName = s"$name.${ident.quoted}"
+      val table = new InMemoryTable(tableName, tableInfo.columns(), tableInfo.partitions(),
+          tableInfo.properties, tableInfo.constraints()) {
+        override def baseCapabiilities: Set[TableCapability] =
+          super.baseCapabiilities - TableCapability.MICRO_BATCH_READ
+      }
+      tables.put(ident, table)
+      namespaces.putIfAbsent(ident.namespace.toList, Map())
+      table
+    }
+  }
 }
 
 class InMemoryStreamTable(override val name: String)
@@ -645,6 +704,7 @@ class InMemoryStreamTable(override val name: String)
   with SupportsRead
   with SupportsMetadataColumns {
   var stream: MemoryStream[Int] = _
+  var scanBuilderWrapper: Option[MemoryStreamScanBuilder => ScanBuilder] = None
 
   def setStream(inputData: MemoryStream[Int]): Unit = stream = inputData
 
@@ -655,7 +715,8 @@ class InMemoryStreamTable(override val name: String)
   }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    new MemoryStreamScanBuilder(stream)
+    val inner = new MemoryStreamScanBuilder(stream)
+    scanBuilderWrapper.map(_(inner)).getOrElse(inner)
   }
 
   private object SeqColumn extends MetadataColumn {
@@ -667,6 +728,36 @@ class InMemoryStreamTable(override val name: String)
   override val metadataColumns: Array[MetadataColumn] = Array(SeqColumn)
 }
 
+class PrunedSchemaRecorder {
+  @volatile var called = false
+  @volatile var schema: StructType = new StructType()
+}
+
+class RecordingPruneScanBuilder(inner: MemoryStreamScanBuilder, recorder: PrunedSchemaRecorder)
+    extends ScanBuilder
+    with SupportsPushDownRequiredColumns {
+
+  override def pruneColumns(requiredSchema: StructType): Unit = {
+    recorder.called = true
+    recorder.schema = requiredSchema
+  }
+
+  override def build(): Scan = {
+    val innerScan = inner.build()
+    val prunedSchema = recorder.schema
+    // Return a scan whose readSchema() reflects the pruned schema so the streaming plan
+    // and scan agree on output columns. Without the fix, pruneColumns is never called and
+    // readSchema() defaults to the full table schema, causing ArrayIndexOutOfBoundsException
+    // when metadata columns are in the plan output but absent from the scan output.
+    new Scan {
+      override def readSchema(): StructType =
+        if (recorder.called) prunedSchema else innerScan.readSchema()
+      override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream =
+        innerScan.toMicroBatchStream(checkpointLocation)
+    }
+  }
+}
+
 class NonStreamV2Table(override val name: String)
     extends Table with SupportsRead with V2TableWithV1Fallback {
   override def schema(): StructType = StructType(Nil)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/StreamingSinkEvolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/StreamingSinkEvolutionSuite.scala
new file mode 100644
index 0000000000000..a242faabaf921
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/StreamingSinkEvolutionSuite.scala
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.test
+
+import org.scalatest.{BeforeAndAfterEach, Tag}
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql._
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.util.Utils
+
+/**
+ * Test suite for streaming sink evolution features including:
+ * - Sink naming via DataStreamWriter.name()
+ * - Sink name validation
+ * - Sink evolution enforcement
+ */
+class StreamingSinkEvolutionSuite extends StreamTest with BeforeAndAfterEach {
+  import testImplicits._
+
+  private def newMetadataDir =
+    Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+  override def afterEach(): Unit = {
+    spark.streams.active.foreach(_.stop())
+    super.afterEach()
+  }
+
+  // =========================
+  // Sink Name Validation Tests
+  // =========================
+
+  testWithSinkEvolution("invalid sink name - contains hyphen") {
+    val input = MemoryStream[Int]
+    input.addData(1, 2, 3)
+    checkError(
+      exception = intercept[AnalysisException] {
+        input.toDF().writeStream
+          .format("noop")
+          .name("my-sink")
+          .option("checkpointLocation", newMetadataDir)
+          .start()
+      },
+      condition = "STREAMING_QUERY_EVOLUTION_ERROR.INVALID_SINK_NAME",
+      parameters = Map("sinkName" -> "my-sink"))
+  }
+
+  testWithSinkEvolution("invalid sink name - contains space") {
+    val input = MemoryStream[Int]
+    input.addData(1, 2, 3)
+    checkError(
+      exception = intercept[AnalysisException] {
+        input.toDF().writeStream
+          .format("noop")
+          .name("my sink")
+          .option("checkpointLocation", newMetadataDir)
+          .start()
+      },
+      condition = "STREAMING_QUERY_EVOLUTION_ERROR.INVALID_SINK_NAME",
+      parameters = Map("sinkName" -> "my sink"))
+  }
+
+  testWithSinkEvolution("invalid sink name - contains special characters") {
+    val input = MemoryStream[Int]
+    input.addData(1, 2, 3)
+    checkError(
+      exception = intercept[AnalysisException] {
+        input.toDF().writeStream
+          .format("noop")
+          .name("my.sink@123!")
+          .option("checkpointLocation", newMetadataDir)
+          .start()
+      },
+      condition = "STREAMING_QUERY_EVOLUTION_ERROR.INVALID_SINK_NAME",
+      parameters = Map("sinkName" -> "my.sink@123!"))
+  }
+
+  testWithSinkEvolution("valid sink names - various patterns") {
+    Seq("mySink", "my_sink", "MySink123", "_private", "sink_123_test", "123sink")
+      .foreach { sinkName =>
+        val checkpointDir = newMetadataDir
+        val input = MemoryStream[Int]
+        input.addData(1, 2, 3)
+        val q = input.toDF().writeStream
+          .format("noop")
+          .name(sinkName)
+          .option("checkpointLocation", checkpointDir)
+          .start()
+        q.processAllAvailable()
+        q.stop()
+      }
+  }
+
+  // ===========================
+  // Sink Evolution Enforcement
+  // ===========================
+
+  testWithSinkEvolution("unnamed sink with sink evolution enabled throws error") {
+    val input = MemoryStream[Int]
+    input.addData(1, 2, 3)
+    val exception = intercept[SparkException] {
+      val q = input.toDF().writeStream
+        .format("noop")
+        // No .name() call - sink is unnamed
+        .option("checkpointLocation", newMetadataDir)
+        .start()
+      q.processAllAvailable()
+      q.stop()
+    }
+
+    checkError(
+      exception = exception,
+      condition = "STREAMING_QUERY_EVOLUTION_ERROR.UNNAMED_STREAMING_SINKS_WITH_ENFORCEMENT",
+      parameters = Map.empty)
+  }
+
+  test("unnamed sink without sink evolution enabled uses default name") {
+    withSQLConf(
+      SQLConf.ENABLE_STREAMING_SINK_EVOLUTION.key -> "false") {
+      val input = MemoryStream[Int]
+      input.addData(1, 2, 3)
+      // Should succeed - no name required when sink evolution is disabled
+      val q = input.toDF().writeStream
+        .format("noop")
+        .option("checkpointLocation", newMetadataDir)
+        .start()
+      q.processAllAvailable()
+      q.stop()
+    }
+  }
+
+  testWithSinkEvolution("named sink succeeds with sink evolution enabled") {
+    val input = MemoryStream[Int]
+    input.addData(1, 2, 3)
+    val q = input.toDF().writeStream
+      .format("noop")
+      .name("my_sink")
+      .option("checkpointLocation", newMetadataDir)
+      .start()
+    q.processAllAvailable()
+    q.stop()
+  }
+
+  testWithSinkEvolution("continuing with same sink name works") {
+    val checkpointDir = newMetadataDir
+    val input = MemoryStream[Int]
+
+    // Start with my_sink
+    input.addData(1, 2, 3)
+    val q1 = input.toDF().writeStream
+      .format("noop")
+      .name("my_sink")
+      .option("checkpointLocation", checkpointDir)
+      .start()
+    q1.processAllAvailable()
+    q1.stop()
+
+    // Restart with same sink name - should work
+    input.addData(4, 5, 6)
+    val q2 = input.toDF().writeStream
+      .format("noop")
+      .name("my_sink")
+      .option("checkpointLocation", checkpointDir)
+      .start()
+    q2.processAllAvailable()
+    q2.stop()
+  }
+
+  // ==============
+  // Helper Methods
+  // ==============
+
+  /**
+   * Helper method to run tests with sink evolution enabled.
+   */
+  def testWithSinkEvolution(testName: String, testTags: Tag*)(testBody: => Any): Unit = {
+    test(testName, testTags: _*) {
+      withSQLConf(
+        SQLConf.ENABLE_STREAMING_SINK_EVOLUTION.key -> "true") {
+        testBody
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala
index 8b4698ba9da08..73bb0e270ca60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/types/GeographyTypeSuite.scala
@@ -23,7 +23,7 @@ import org.json4s.JsonAST.JString
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.SparkIllegalArgumentException
-import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalGeographyType}
+import org.apache.spark.sql.catalyst.types.{PhysicalBinaryViewType, PhysicalDataType}
 
 class GeographyTypeSuite extends SparkFunSuite {
 
@@ -242,7 +242,7 @@ class GeographyTypeSuite extends SparkFunSuite {
     }
   }
 
-  test("PhysicalDataType maps GeographyType to PhysicalGeographyType") {
+  test("PhysicalDataType maps GeographyType to PhysicalBinaryViewType") {
     val geometryTypes: Seq[DataType] = Seq(
       GeographyType(4326),
       GeographyType(4267),
@@ -252,7 +252,7 @@ class GeographyTypeSuite extends SparkFunSuite {
     )
     geometryTypes.foreach { geometryType =>
       val pdt = PhysicalDataType(geometryType)
-      assert(pdt.isInstanceOf[PhysicalGeographyType])
+      assert(pdt.isInstanceOf[PhysicalBinaryViewType])
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala
index 7e124ae130a4e..b7386b259557a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/types/GeometryTypeSuite.scala
@@ -23,7 +23,7 @@ import org.json4s.JsonAST.JString
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.SparkIllegalArgumentException
-import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalGeometryType}
+import org.apache.spark.sql.catalyst.types.{PhysicalBinaryViewType, PhysicalDataType}
 
 class GeometryTypeSuite extends SparkFunSuite {
 
@@ -212,7 +212,7 @@ class GeometryTypeSuite extends SparkFunSuite {
     }
   }
 
-  test("PhysicalDataType maps GeometryType to PhysicalGeometryType") {
+  test("PhysicalDataType maps GeometryType to PhysicalBinaryViewType") {
     val geometryTypes: Seq[DataType] = Seq(
       GeometryType(0),
       GeometryType(3857),
@@ -225,7 +225,7 @@ class GeometryTypeSuite extends SparkFunSuite {
     )
     geometryTypes.foreach { geometryType =>
       val pdt = PhysicalDataType(geometryType)
-      assert(pdt.isInstanceOf[PhysicalGeometryType])
+      assert(pdt.isInstanceOf[PhysicalBinaryViewType])
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
index e375678157b2f..d6a5008f57494 100644
--- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.metric.SQLMetricsTestUtils
+import org.apache.spark.sql.execution.ui.SQLExecutionUIData
 import org.apache.spark.sql.internal.SQLConf.ADAPTIVE_EXECUTION_ENABLED
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -216,6 +217,119 @@ class SqlResourceWithActualMetricsSuite
     }
   }
 
+  test("SPARK-56811: sqlTable groups sub-executions under their root execution") {
+    // CACHE TABLE produces a root execution plus an inner sub-execution that
+    // shares its rootExecutionId. This is the canonical case where the SQL
+    // listing should fold the sub row under the root rather than flattening it.
+    spark.sql("CREATE OR REPLACE TEMP VIEW spark_56811 AS SELECT id FROM RANGE(10)")
+      .collect()
+    spark.sql("CACHE TABLE spark_56811_cached AS SELECT * FROM spark_56811").collect()
+    try {
+      eventually(timeout(10.seconds), interval(1.second)) {
+        val baseUrl = spark.sparkContext.ui.get.webUrl +
+          s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/sqlTable"
+
+        // Grouping ON: roots only, with subExecutions embedded on the root that
+        // owns a sub-execution.
+        val groupedUrl = new URI(
+          s"$baseUrl?start=0&length=100&draw=1&groupSubExecution=true").toURL
+        val (groupedCode, groupedOpt, _) = getContentAndCode(groupedUrl)
+        assert(groupedCode === HttpServletResponse.SC_OK)
+        val groupedJson = JsonMethods.parse(groupedOpt.get)
+        val groupedRecordsTotal = (groupedJson \ "recordsTotal").extract[Long]
+        val groupedRecordsFiltered = (groupedJson \ "recordsFiltered").extract[Long]
+        val groupedRows = (groupedJson \ "aaData").children
+        assert(groupedRecordsTotal === groupedRows.size,
+          "with no filter, recordsTotal should match returned root count")
+        assert(groupedRecordsFiltered === groupedRows.size,
+          "with no filter, recordsFiltered should match returned root count")
+        // Every row in grouped mode is either a true root (id == rootExecutionId)
+        // or an orphan sub whose real parent is absent from the result set.
+        val visibleIds = groupedRows.map(r => (r \ "id").extract[Long]).toSet
+        groupedRows.foreach { row =>
+          val id = (row \ "id").extract[Long]
+          val rootId = (row \ "rootExecutionId").extract[Long]
+          assert(id == rootId || !visibleIds.contains(rootId),
+            s"grouped row $id (rootId=$rootId) is neither a root nor an orphan")
+        }
+        val rootsWithSubs = groupedRows.filter { row =>
+          (row \ "subExecutions").children.nonEmpty
+        }
+        assert(rootsWithSubs.nonEmpty,
+          "CACHE TABLE should produce at least one root with sub-executions")
+        rootsWithSubs.foreach { row =>
+          val rootId = (row \ "id").extract[Long]
+          (row \ "subExecutions").children.foreach { sub =>
+            assert((sub \ "rootExecutionId").extract[Long] === rootId,
+              "sub-execution should reference its parent root")
+            assert((sub \ "id").extract[Long] !== rootId,
+              "sub-execution must not have the same id as its root")
+          }
+        }
+
+        // Grouping OFF: flat list of every execution, with no embedded subs.
+        val flatUrl = new URI(
+          s"$baseUrl?start=0&length=100&draw=2&groupSubExecution=false").toURL
+        val (flatCode, flatOpt, _) = getContentAndCode(flatUrl)
+        assert(flatCode === HttpServletResponse.SC_OK)
+        val flatJson = JsonMethods.parse(flatOpt.get)
+        val flatRows = (flatJson \ "aaData").children
+        assert(flatRows.size > groupedRows.size,
+          "flat listing should contain at least one extra sub-execution row")
+        val embeddedSubs = groupedRows.map(r => (r \ "subExecutions").children.size).sum
+        assert(flatRows.size === groupedRows.size + embeddedSubs,
+          "flat size should equal grouped roots plus embedded sub rows")
+        flatRows.foreach { row =>
+          assert((row \ "subExecutions").children.isEmpty,
+            "flat listing should not embed subExecutions")
+        }
+      }
+    } finally {
+      spark.sql("UNCACHE TABLE IF EXISTS spark_56811_cached")
+    }
+  }
+
+  test("SPARK-56811: partitionRoots surfaces orphan sub-executions as root rows") {
+    def mkExec(id: Long, rootId: Long): SQLExecutionUIData = new SQLExecutionUIData(
+      executionId = id,
+      rootExecutionId = rootId,
+      description = s"exec $id",
+      details = "",
+      physicalPlanDescription = "",
+      modifiedConfigs = Map.empty,
+      metrics = Seq.empty,
+      submissionTime = id,
+      completionTime = None,
+      errorMessage = None,
+      jobs = Map.empty,
+      stages = Set.empty,
+      metricValues = null,
+      queryId = null)
+
+    // Tree:
+    //   1 (root) -> 2, 3 (subs)
+    //   4 (root, no subs)
+    //   6 (sub of 5, but 5 is missing -> orphan)
+    val root1 = mkExec(1, 1)
+    val sub2 = mkExec(2, 1)
+    val sub3 = mkExec(3, 1)
+    val root4 = mkExec(4, 4)
+    val orphan6 = mkExec(6, 5)
+
+    val (roots, subsByRoot) =
+      SqlResource.partitionRoots(Seq(root1, sub2, sub3, root4, orphan6))
+
+    assert(roots.map(_.executionId).toSet === Set(1L, 4L, 6L),
+      "true roots and orphan subs should both be promoted to root rows")
+    assert(subsByRoot.keySet === Set(1L),
+      "only execs with a parent present in the input should appear in subsByRoot")
+    assert(subsByRoot(1L).map(_.executionId).toSet === Set(2L, 3L),
+      "subs should be grouped under their parent root id")
+    val orphanRow = roots.find(_.executionId == 6L).get
+    assert(orphanRow.rootExecutionId === 5L,
+      "orphan promoted to a root row preserves its original rootExecutionId")
+  }
+
   test("SPARK-56137: sqlList returns ISO date format in submissionTime") {
     withSQLConf(ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       spark.sql("SELECT 'date_format_test'").collect()
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index 13f9ae055fa73..2ae00f6db8221 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -36,7 +36,8 @@
     "bitwise_funcs", "conversion_funcs", "csv_funcs",
     "xml_funcs", "lambda_funcs", "collection_funcs",
     "url_funcs", "hash_funcs", "struct_funcs",
-    "table_funcs", "variant_funcs", "protobuf_funcs", "sketch_funcs"
+    "table_funcs", "variant_funcs", "protobuf_funcs", "sketch_funcs",
+    "st_funcs"
 }
 
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 0c2d5066f3d8a..7fb1e6ca6bed1 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
index fea878bcf05d5..10f520314865d 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
@@ -26,7 +26,7 @@ import org.apache.spark.internal.LogKeys.{HIVE_OPERATION_TYPE, STATEMENT_ID}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
 import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, SessionCatalog}
-import org.apache.spark.sql.catalyst.catalog.CatalogTableType.{EXTERNAL, MANAGED, VIEW}
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType.{EXTERNAL, MANAGED, METRIC_VIEW, VIEW}
 import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
 import org.apache.spark.util.Utils
 
@@ -107,7 +107,7 @@ private[hive] trait SparkOperation extends Operation with Logging {
 
   def tableTypeString(tableType: CatalogTableType): String = tableType match {
     case EXTERNAL | MANAGED => "TABLE"
-    case VIEW => "VIEW"
+    case VIEW | METRIC_VIEW => "VIEW"
     case t =>
       throw new IllegalArgumentException(s"Unknown table type is found: $t")
   }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 0a024fb10ee01..ebe5fca9f1c44 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -41,7 +41,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.catalyst.util.SQLKeywordUtils
+import org.apache.spark.sql.catalyst.util.{SQLKeywordUtils, StringUtils}
 import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.hive.security.HiveDelegationTokenProvider
 import org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.closeHiveSessionStateIfStarted
@@ -613,110 +613,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     }
   }
 
-  // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
-  // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
-  // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
-  // hence we refined this function a little bit.
-  // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
-  private[hive] def splitSemiColon(line: String): JList[String] = {
-    var insideSingleQuote = false
-    var insideDoubleQuote = false
-    var insideSimpleComment = false
-    var bracketedCommentLevel = 0
-    var escape = false
-    var beginIndex = 0
-    var leavingBracketedComment = false
-    var isStatement = false
-    val ret = new JArrayList[String]
-
-    def insideBracketedComment: Boolean = bracketedCommentLevel > 0
-    def insideComment: Boolean = insideSimpleComment || insideBracketedComment
-    def statementInProgress(index: Int): Boolean = isStatement || (!insideComment &&
-      index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
-
-    for (index <- 0 until line.length) {
-      // Checks if we need to decrement a bracketed comment level; the last character '/' of
-      // bracketed comments is still inside the comment, so `insideBracketedComment` must keep true
-      // in the previous loop and we decrement the level here if needed.
-      if (leavingBracketedComment) {
-        bracketedCommentLevel -= 1
-        leavingBracketedComment = false
-      }
-
-      if (line.charAt(index) == '\'' && !insideComment) {
-        // take a look to see if it is escaped
-        // See the comment above about SPARK-31595
-        if (!escape && !insideDoubleQuote) {
-          // flip the boolean variable
-          insideSingleQuote = !insideSingleQuote
-        }
-      } else if (line.charAt(index) == '\"' && !insideComment) {
-        // take a look to see if it is escaped
-        // See the comment above about SPARK-31595
-        if (!escape && !insideSingleQuote) {
-          // flip the boolean variable
-          insideDoubleQuote = !insideDoubleQuote
-        }
-      } else if (line.charAt(index) == '-') {
-        val hasNext = index + 1 < line.length
-        if (insideDoubleQuote || insideSingleQuote || insideComment) {
-          // Ignores '-' in any case of quotes or comment.
-          // Avoids to start a comment(--) within a quoted segment or already in a comment.
-          // Sample query: select "quoted value --"
-          //                                    ^^ avoids starting a comment if it's inside quotes.
-        } else if (hasNext && line.charAt(index + 1) == '-') {
-          // ignore quotes and ; in simple comment
-          insideSimpleComment = true
-        }
-      } else if (line.charAt(index) == ';') {
-        if (insideSingleQuote || insideDoubleQuote || insideComment) {
-          // do not split
-        } else {
-          if (isStatement) {
-            // split, do not include ; itself
-            ret.add(line.substring(beginIndex, index))
-          }
-          beginIndex = index + 1
-          isStatement = false
-        }
-      } else if (line.charAt(index) == '\n') {
-        // with a new line the inline simple comment should end.
-        if (!escape) {
-          insideSimpleComment = false
-        }
-      } else if (line.charAt(index) == '/' && !insideSimpleComment) {
-        val hasNext = index + 1 < line.length
-        if (insideSingleQuote || insideDoubleQuote) {
-          // Ignores '/' in any case of quotes
-        } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
-          // Decrements `bracketedCommentLevel` at the beginning of the next loop
-          leavingBracketedComment = true
-        } else if (hasNext && line.charAt(index + 1) == '*') {
-          bracketedCommentLevel += 1
-        }
-      }
-      // set the escape
-      if (escape) {
-        escape = false
-      } else if (line.charAt(index) == '\\') {
-        escape = true
-      }
-
-      isStatement = statementInProgress(index)
-    }
-    // Check the last char is end of nested bracketed comment.
-    val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1
-    // Spark SQL support simple comment and nested bracketed comment in query body.
-    // But if Spark SQL receives a comment alone, it will throw parser exception.
-    // In Spark SQL CLI, if there is a completed comment in the end of whole query,
-    // since Spark SQL CLL use `;` to split the query, CLI will pass the comment
-    // to the backend engine and throw exception. CLI should ignore this comment,
-    // If there is an uncompleted statement or an uncompleted bracketed comment in the end,
-    // CLI should also pass this part to the backend engine, which may throw an exception
-    // with clear error message.
-    if (!endOfBracketedComment && (isStatement || insideBracketedComment)) {
-      ret.add(line.substring(beginIndex))
-    }
-    ret
-  }
+  // Splits SQL into individual statements by top-level semicolons. See
+  // [[StringUtils.splitSemiColonWithIndex]] for the implementation.
+  // Note: [SPARK-31595], [SPARK-33100], [SPARK-54876]
+  private[hive] def splitSemiColon(line: String): JList[String] =
+    StringUtils.splitSemiColonWithIndex(line, enableSqlScripting = false).asJava
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 2040f8f565a2e..f6f88cf8a0121 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -66,7 +66,9 @@ private[hive] class SparkSQLDriver(val sparkSession: SparkSession = SparkSQLEnv.
       val substitutorCommand = SQLConf.withExistingConf(sparkSession.sessionState.conf) {
         new VariableSubstitution().substitute(command)
       }
-      sparkSession.sparkContext.setJobDescription(substitutorCommand)
+      val redactedCommand =
+        Utils.redact(sparkSession.sessionState.conf.stringRedactionPattern, substitutorCommand)
+      sparkSession.sparkContext.setJobDescription(redactedCommand)
 
       // Parse with an empty parameter context to enable pre-parsing phase that scans for
       // parameter markers. If any parameter markers (:name or ?) are found in the SQL,
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 5f25ca02d2c45..5debfd168404c 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -674,7 +674,18 @@ class CliSuite extends SparkFunSuite {
       "SELECT /* comment */  1;" -> Seq("SELECT /* comment */  1"),
       "-- comment " -> Seq(),
       "-- comment \nSELECT 1" -> Seq("-- comment \nSELECT 1"),
-      "/*  comment */  " -> Seq()
+      "/*  comment */  " -> Seq(),
+      // SPARK-54876: statement after semicolon ending with block comment should not be dropped
+      "SELECT 1; SELECT 2 /* comment */" -> Seq("SELECT 1", " SELECT 2 /* comment */"),
+      // SPARK-54876: line comment followed by block comment should produce empty result
+      "-- foo\n/* bar */" -> Seq(),
+      "SELECT 1; -- foo\n /* bar */" -> Seq("SELECT 1"),
+      // SPARK-54876: nested block comments
+      "SELECT 1; /* outer /* inner */ */" -> Seq("SELECT 1"),
+      // SPARK-54876: preceding closed block comment + line comment (no SQL statement)
+      "/* a */ -- foo\n/* b */" -> Seq(),
+      // SPARK-54876: semicolons inside backtick-quoted identifiers are not split points
+      "SELECT * FROM `t;a`; SELECT 1" -> Seq("SELECT * FROM `t;a`", " SELECT 1")
     ).foreach { case (query, ret) =>
       assert(cli.splitSemiColon(query).asScala === ret)
     }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
new file mode 100644
index 0000000000000..f42970ede6694
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriverSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import org.apache.spark.SparkContext
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.Utils.REDACTION_REPLACEMENT_TEXT
+
+class SparkSQLDriverSuite extends SharedSparkSession {
+
+  test("job description should be redacted by spark.sql.redaction.string.regex") {
+    withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> "password=([^\\s]+)") {
+      var jobDescription: String = null
+      spark.sparkContext.addSparkListener(new SparkListener {
+        override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+          jobDescription =
+            jobStart.properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION)
+        }
+      })
+
+      val driver = new SparkSQLDriver(spark)
+      try {
+        driver.run("SELECT 'password=secret123'")
+      } finally {
+        driver.close()
+      }
+
+      spark.sparkContext.listenerBus.waitUntilEmpty()
+      assert(jobDescription != null)
+      assert(!jobDescription.contains("secret123"))
+      assert(jobDescription.contains(REDACTION_REPLACEMENT_TEXT))
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index c9696a1b2fe68..1ecf5b3dae4a0 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
       val sessionHandle = client.openSession(user, "")
       val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS)
       // scalastyle:off line.size.limit
-      assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATABASE,CURRENT_DATE,CURRENT_PATH,CURRENT_SCHEMA,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUALIFY,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE")
+      assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,APPROX,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BERNOULLI,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATABASE,CURRENT_DATE,CURRENT_PATH,CURRENT_SCHEMA,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_PATH,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTANCE,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXACT,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEAREST,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PATH,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUALIFY,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SIMILARITY,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM,SYSTEM_PATH,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE")
       // scalastyle:on line.size.limit
     }
   }
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 1cf81ed3bb2b8..a52842ab52a81 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index a921f3eaff11b..745b45934fc0e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -274,7 +274,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         // Spark-created views do not have to be Hive compatible. If the data type is not
         // Hive compatible, we can set schema to empty so that Spark can still read this
         // view as the schema is also encoded in the table properties.
-        case schema if tableDefinition.tableType == CatalogTableType.VIEW &&
+        case schema if tableDefinition.isViewLike &&
             schema.exists(f => !isHiveCompatibleDataType(f.dataType)) =>
           EMPTY_DATA_SCHEMA
         case other => other
@@ -294,7 +294,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       try {
         client.createTable(tableWithDataSourceProps, ignoreIfExists)
       } catch {
-        case NonFatal(e) if tableDefinition.tableType == CatalogTableType.VIEW &&
+        case NonFatal(e) if tableDefinition.isViewLike &&
             hiveCompatibleSchema != EMPTY_DATA_SCHEMA =>
           // If for some reason we fail to store the schema we store it as empty there
           // since we already store the real schema in the table properties. This try-catch
@@ -450,6 +450,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val properties = new mutable.HashMap[String, String]
 
     properties.put(CREATED_SPARK_VERSION, table.createVersion)
+
+    // Hive's `HiveTableType` enum has no metric-view variant -- it stores both regular views
+    // and metric views as `VIRTUAL_VIEW`. Persist a property marker so `restoreTableMetadata`
+    // can lift the round-tripped `CatalogTableType.VIEW` back to `CatalogTableType.METRIC_VIEW`.
+    if (table.tableType == CatalogTableType.METRIC_VIEW) {
+      properties.put(CatalogTable.VIEW_SUB_TYPE, CatalogTable.VIEW_SUB_TYPE_METRIC_VIEW)
+    }
     // This is for backward compatibility to Spark 2 to read tables with char/varchar created by
     // Spark 3.1. At read side, we will restore a table schema from its properties. So, we need to
     // clear the `varchar(n)` and `char(n)` and replace them with `string` as Spark 2 does not have
@@ -595,7 +602,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     requireTableExists(db, tableDefinition.identifier.table)
     verifyTableProperties(tableDefinition)
 
-    if (tableDefinition.tableType == VIEW) {
+    if (tableDefinition.isViewLike) {
       val newTableProps = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition).toMap
       val schemaWithNoCollation = removeCollation(tableDefinition.schema)
       val hiveCompatibleSchema =
@@ -834,8 +841,17 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     var table = inputTable
 
+    // HMS round-trips both regular views and metric views as `HiveTableType.VIRTUAL_VIEW`,
+    // which `HiveClientImpl.getTableOption` always maps back to `CatalogTableType.VIEW`. Lift
+    // it back to `CatalogTableType.METRIC_VIEW` when the persisted sub-type marker is present.
+    if (table.tableType == VIEW &&
+        table.properties.get(CatalogTable.VIEW_SUB_TYPE)
+          .contains(CatalogTable.VIEW_SUB_TYPE_METRIC_VIEW)) {
+      table = table.copy(tableType = METRIC_VIEW)
+    }
+
     table.properties.get(DATASOURCE_PROVIDER) match {
-      case None if table.tableType == VIEW =>
+      case None if table.isViewLike =>
         // If this is a view created by Spark 2.2 or higher versions, we should restore its schema
         // from table properties.
         getSchemaFromTableProperties(table.properties).foreach { schemaFromTableProps =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 087f68ce3c790..bf40327bd991d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -839,8 +839,9 @@ private[hive] trait HiveInspectors {
       PrimitiveObjectInspectorFactory.javaHiveIntervalDayTimeObjectInspector
     case _: YearMonthIntervalType =>
       PrimitiveObjectInspectorFactory.javaHiveIntervalYearMonthObjectInspector
-    // TODO decimal precision?
-    case DecimalType() => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
+    case DecimalType.Fixed(precision, scale) =>
+      PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+        new DecimalTypeInfo(precision, scale))
     case StructType(fields) =>
       ObjectInspectorFactory.getStandardStructObjectInspector(
         java.util.Arrays.asList(fields.map(f => f.name) : _*),
@@ -880,8 +881,8 @@ private[hive] trait HiveInspectors {
       getDateWritableConstantObjectInspector(value)
     case Literal(value, TimestampType) =>
       getTimestampWritableConstantObjectInspector(value)
-    case Literal(value, DecimalType()) =>
-      getDecimalWritableConstantObjectInspector(value)
+    case Literal(value, DecimalType.Fixed(precision, scale)) =>
+      getDecimalWritableConstantObjectInspector(value, precision, scale)
     case Literal(_, NullType) =>
       getPrimitiveNullWritableConstantObjectInspector
     case Literal(_, _: DayTimeIntervalType) =>
@@ -1035,9 +1036,10 @@ private[hive] trait HiveInspectors {
     PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
       TypeInfoFactory.timestampTypeInfo, getTimestampWritable(value))
 
-  private def getDecimalWritableConstantObjectInspector(value: Any): ObjectInspector =
+  private def getDecimalWritableConstantObjectInspector(
+      value: Any, precision: Int, scale: Int): ObjectInspector =
     PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
-      TypeInfoFactory.decimalTypeInfo, getDecimalWritable(value))
+      new DecimalTypeInfo(precision, scale), getDecimalWritable(value))
 
   private def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector =
     PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index b71022c1c8755..4bebb6c85704a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -1163,7 +1163,7 @@ private[hive] object HiveClientImpl extends Logging {
     catalogTableType match {
       case CatalogTableType.EXTERNAL => HiveTableType.EXTERNAL_TABLE
       case CatalogTableType.MANAGED => HiveTableType.MANAGED_TABLE
-      case CatalogTableType.VIEW => HiveTableType.VIRTUAL_VIEW
+      case t if CatalogTable.isViewLike(t) => HiveTableType.VIRTUAL_VIEW
       case t =>
         throw new IllegalArgumentException(
           s"Unknown table type is found at toHiveTableType: $t")
diff --git a/sql/hive/src/test/resources/conf/binding-policy-exceptions/configs-without-binding-policy-exceptions b/sql/hive/src/test/resources/conf/binding-policy-exceptions/configs-without-binding-policy-exceptions
index 4563e81d14064..96d709d838fc1 100644
--- a/sql/hive/src/test/resources/conf/binding-policy-exceptions/configs-without-binding-policy-exceptions
+++ b/sql/hive/src/test/resources/conf/binding-policy-exceptions/configs-without-binding-policy-exceptions
@@ -659,6 +659,7 @@ spark.sql.inMemoryColumnarStorage.hugeVectorReserveRatio
 spark.sql.inMemoryColumnarStorage.hugeVectorThreshold
 spark.sql.inMemoryColumnarStorage.partitionPruning
 spark.sql.inMemoryTableScanStatistics.enable
+spark.sql.insertNestedTypeCoercion.enabled
 spark.sql.join.preferSortMergeJoin
 spark.sql.json.enableExactStringParsing
 spark.sql.json.enablePartialResults
@@ -1035,6 +1036,7 @@ spark.sql.streaming.numRecentProgressUpdates
 spark.sql.streaming.offsetLog.formatVersion
 spark.sql.streaming.optimizeOneRowPlan.enabled
 spark.sql.streaming.pollingDelay
+spark.sql.streaming.queryEvolution.enableSinkEvolution
 spark.sql.streaming.queryEvolution.enableSourceEvolution
 spark.sql.streaming.ratioExtraSpaceAllowedInCheckpoint
 spark.sql.streaming.realTimeMode.allowlistCheck
@@ -1173,6 +1175,7 @@ spark.taskMetrics.trackUpdatedBlockStatuses
 spark.test.noStageRetry
 spark.testing
 spark.testing.dynamicAllocation.schedule.enabled
+spark.testing.injectShuffleFetchFailures
 spark.testing.memory
 spark.testing.nCoresPerExecutor
 spark.testing.nExecutorsPerHost
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/configaudit/SparkConfigBindingPolicySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/configaudit/SparkConfigBindingPolicySuite.scala
index 4cc077826ff77..15a89667e1105 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/configaudit/SparkConfigBindingPolicySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/configaudit/SparkConfigBindingPolicySuite.scala
@@ -36,7 +36,7 @@ class SparkConfigBindingPolicySuite extends SparkFunSuite {
 
   test("Test adding bindingPolicy to config") {
     val allConfigs = SQLConf.getConfigEntries().asScala.filter { entry =>
-      entry.key == SQLConf.VIEW_SCHEMA_EVOLUTION_PRESERVE_USER_COMMENTS.key
+      entry.key == SQLConf.PLAN_CHANGE_LOG_LEVEL.key
     }
     assert(allConfigs.head.bindingPolicy.isDefined)
     assert(allConfigs.head.bindingPolicy.get == ConfigBindingPolicy.SESSION)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
index 13c48f38e7f78..8acabd579d446 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
@@ -21,9 +21,10 @@ import java.util
 
 import org.apache.hadoop.hive.ql.udf.UDAFPercentile
 import org.apache.hadoop.hive.serde2.io.DoubleWritable
-import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory, StructObjectInspector}
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory, PrimitiveObjectInspector, StructObjectInspector}
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo
 import org.apache.hadoop.io.LongWritable
 
 import org.apache.spark.SparkFunSuite
@@ -251,4 +252,43 @@ class HiveInspectorSuite extends SparkFunSuite with HiveInspectors {
       unwrap(wrap(null, toInspector(Literal.create(d, dt)), dt),
         toInspector(Literal.create(d, dt))))
   }
+
+  test("SPARK-50610: toInspector(dataType) should preserve decimal precision and scale") {
+    val dt = DecimalType(18, 10)
+    val oi = toInspector(dt).asInstanceOf[PrimitiveObjectInspector]
+    val typeInfo = oi.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+    assert(typeInfo.precision() === 18)
+    assert(typeInfo.scale() === 10)
+
+    // Also verify non-default precision/scale combinations
+    val dt2 = DecimalType(10, 2)
+    val oi2 = toInspector(dt2).asInstanceOf[PrimitiveObjectInspector]
+    val typeInfo2 = oi2.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+    assert(typeInfo2.precision() === 10)
+    assert(typeInfo2.scale() === 2)
+
+    // Verify the default DecimalType also works
+    val dt3 = DecimalType.SYSTEM_DEFAULT
+    val oi3 = toInspector(dt3).asInstanceOf[PrimitiveObjectInspector]
+    val typeInfo3 = oi3.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+    assert(typeInfo3.precision() === DecimalType.MAX_PRECISION)
+    assert(typeInfo3.scale() === DecimalType.DEFAULT_SCALE)
+  }
+
+  test("SPARK-50610: toInspector(expr) should preserve decimal precision and scale for literals") {
+    val decimal = Decimal(BigDecimal("123.45"))
+    val dt = DecimalType(10, 2)
+    val literal = Literal.create(decimal, dt)
+    val oi = toInspector(literal).asInstanceOf[PrimitiveObjectInspector]
+    val typeInfo = oi.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+    assert(typeInfo.precision() === 10)
+    assert(typeInfo.scale() === 2)
+
+    // Null literal should still preserve type info
+    val nullLiteral = Literal.create(null, DecimalType(18, 10))
+    val oi2 = toInspector(nullLiteral).asInstanceOf[PrimitiveObjectInspector]
+    val typeInfo2 = oi2.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+    assert(typeInfo2.precision() === 18)
+    assert(typeInfo2.scale() === 10)
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index ff9be5ce759fe..8818983274ca0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1141,7 +1141,7 @@ class HiveDDLSuite
           "alternative" -> "DROP TABLE",
           "operation" -> "DROP VIEW",
           "foundType" -> "MANAGED",
-          "requiredType" -> "VIEW",
+          "requiredType" -> "VIEW or METRIC_VIEW",
           "objectName" -> s"$SESSION_CATALOG_NAME.default.tab1"
         )
       )
diff --git a/sql/pipelines/pom.xml b/sql/pipelines/pom.xml
index a08c2477c18fa..e3dc230db89e5 100644
--- a/sql/pipelines/pom.xml
+++ b/sql/pipelines/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.13</artifactId>
-        <version>4.2.0.1-4.3.0-0</version>
+        <version>4.2.0.1-4.3.0-1</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <artifactId>spark-pipelines_2.13</artifactId>
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcReservedNames.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcReservedNames.scala
new file mode 100644
index 0000000000000..2b0f8e293e76b
--- /dev/null
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcReservedNames.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+/**
+ * Names that AutoCDC reserves for its own use, both for internal columns it inserts during
+ * reconciliation (e.g. `${prefix}metadata`, `${prefix}winning_row`) and for internal tables it
+ * manages alongside user-defined targets (e.g. the per-target auxiliary state table).
+ *
+ * A single recognizable prefix gives a single auditable answer to "what does AutoCDC own", and
+ * lets user-defined columns and tables be unambiguously distinguished from AutoCDC-managed ones.
+ */
+private[pipelines] object AutoCdcReservedNames {
+
+  /** Common reserved-name prefix shared by AutoCDC internal columns and internal tables. */
+  val prefix: String = "__spark_autocdc_"
+}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgs.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgs.scala
new file mode 100644
index 0000000000000..49636acc1f8f1
--- /dev/null
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgs.scala
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.apache.spark.sql.{AnalysisException, Column}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.QuotingUtils
+import org.apache.spark.sql.types.StructType
+
+/**
+ * A single, unqualified column identifier (no nested path or table/alias qualifier). Backticks
+ * are consumed: "`a.b`" is stored as "a.b" in [[name]]. Use [[name]] for direct schema-fieldName
+ * comparison and [[quoted]] for APIs that re-parse identifier strings.
+ */
+case class UnqualifiedColumnName private (name: String) {
+  def quoted: String = QuotingUtils.quoteIdentifier(name)
+}
+
+object UnqualifiedColumnName {
+  def apply(nameParts: Seq[String]): UnqualifiedColumnName = {
+    if (nameParts.length != 1) {
+      throw multipartColumnIdentifierError(nameParts.mkString("."), nameParts)
+    }
+    new UnqualifiedColumnName(nameParts.head)
+  }
+
+  def apply(input: String): UnqualifiedColumnName =
+    apply(CatalystSqlParser.parseMultipartIdentifier(input))
+
+  private def multipartColumnIdentifierError(
+      columnName: String,
+      nameParts: Seq[String]
+  ): AnalysisException =
+    new AnalysisException(
+      errorClass = "AUTOCDC_MULTIPART_COLUMN_IDENTIFIER",
+      messageParameters = Map(
+        "columnName" -> columnName,
+        "nameParts" -> nameParts.mkString(", ")
+      )
+    )
+}
+
+sealed trait ColumnSelection
+object ColumnSelection {
+
+  case class IncludeColumns(columns: Seq[UnqualifiedColumnName]) extends ColumnSelection
+  case class ExcludeColumns(columns: Seq[UnqualifiedColumnName])
+      extends ColumnSelection
+
+  /**
+   * Applies [[ColumnSelection]] to a [[StructType]] and returns the filtered schema. Field order
+   * follows the original schema; only matching fields are retained in the returned schema.
+   *
+   * @param schemaName      Logical name of the schema being filtered, surfaced in error messages
+   *                        when columns are not found (e.g. "microbatch", "target").
+   * @param schema          The schema to filter.
+   * @param columnSelection The user-provided selection. `None` is a no-op and returns `schema`
+   *                        unchanged.
+   * @param caseSensitive   Whether to match column names case-sensitively against the schema.
+   *                        Callers should derive this from the session, e.g.
+   *                        `session.sessionState.conf.caseSensitiveAnalysis`, so column matching
+   *                        stays consistent with `spark.sql.caseSensitive`.
+   */
+  def applyToSchema(
+      schemaName: String,
+      schema: StructType,
+      columnSelection: Option[ColumnSelection],
+      caseSensitive: Boolean): StructType = columnSelection match {
+    case None =>
+      // A None column selection is interpreted as a no-op.
+      schema
+    case Some(IncludeColumns(cols)) =>
+      val keepIndices = lookupFieldIndices(schemaName, schema, cols, caseSensitive)
+      StructType(schema.fields.zipWithIndex.collect {
+        case (field, idx) if keepIndices.contains(idx) => field
+      })
+    case Some(ExcludeColumns(cols)) =>
+      val dropIndices = lookupFieldIndices(schemaName, schema, cols, caseSensitive)
+      StructType(schema.fields.zipWithIndex.collect {
+        case (field, idx) if !dropIndices.contains(idx) => field
+      })
+  }
+
+  private def lookupFieldIndices(
+      schemaName: String,
+      schema: StructType,
+      fields: Seq[UnqualifiedColumnName],
+      caseSensitive: Boolean): Set[Int] = {
+    val caseAwareGetFieldIndex: String => Option[Int] =
+      if (caseSensitive) schema.getFieldIndex else schema.getFieldIndexCaseInsensitive
+
+    val fieldIndexResolutions = fields.map(f => f -> caseAwareGetFieldIndex(f.name))
+    val missingFieldNames = fieldIndexResolutions.collect { case (f, None) => f.name }.distinct
+    if (missingFieldNames.nonEmpty) {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA",
+        messageParameters = Map(
+          "caseSensitivity" -> CaseSensitivityLabels.of(caseSensitive),
+          "schemaName" -> schemaName,
+          "missingColumns" -> missingFieldNames.mkString(", "),
+          "availableColumns" -> schema.fieldNames.mkString(", ")
+        )
+      )
+    }
+    fieldIndexResolutions.flatMap { case (_, idx) => idx }.toSet
+  }
+}
+
+/** User-facing case-sensitivity labels surfaced in AutoCDC error messages. */
+private[pipelines] object CaseSensitivityLabels {
+  val CaseSensitive: String = "case-sensitive"
+  val CaseInsensitive: String = "case-insensitive"
+
+  def of(caseSensitive: Boolean): String =
+    if (caseSensitive) CaseSensitive else CaseInsensitive
+}
+
+/** The SCD (Slowly Changing Dimension) strategy for a CDC flow. */
+sealed trait ScdType {
+  /**
+   * Short, stable label for this SCD type. Persisted as table property on AutoCDC flow auxiliary
+   * tables.
+   */
+  def label: String
+}
+
+object ScdType {
+  /** Representation for the standard SCD1 strategy. */
+  case object Type1 extends ScdType {
+    override val label: String = "SCD1"
+  }
+  /** Representation for the standard SCD2 strategy. */
+  case object Type2 extends ScdType {
+    override val label: String = "SCD2"
+  }
+}
+
+/**
+ * Configuration for an AutoCDC flow.
+ *
+ * @param keys            The column(s) that uniquely identify a row in the source data.
+ * @param sequencing      Expression ordering CDC events to correctly resolve out-of-order
+ *                        arrivals. Must be a sortable type.
+ * @param deleteCondition Expression that marks a source row as a DELETE. When None, all
+ *                        rows are treated as upserts.
+ * @param storedAsScdType The SCD strategy these args should be applied to.
+ * @param columnSelection Which source columns to select in the target table. None means
+ *                        all columns.
+ */
+case class ChangeArgs(
+    keys: Seq[UnqualifiedColumnName],
+    sequencing: Column,
+    storedAsScdType: ScdType,
+    deleteCondition: Option[Column] = None,
+    columnSelection: Option[ColumnSelection] = None
+) {
+  ChangeArgs.validateNonEmptyKeys(keys)
+}
+
+object ChangeArgs {
+  /**
+   * Validates that [[ChangeArgs.keys]] is non-empty. Both SCD1 and SCD2 semantics require at
+   * least one key column to identify rows; rejecting empty key sets at construction lets
+   * downstream consumers rely on `keys.nonEmpty` without re-validating.
+   */
+  private def validateNonEmptyKeys(keys: Seq[UnqualifiedColumnName]): Unit = {
+    if (keys.isEmpty) {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_EMPTY_KEYS",
+        messageParameters = Map.empty
+      )
+    }
+  }
+}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessor.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessor.scala
new file mode 100644
index 0000000000000..0656a7eb91b01
--- /dev/null
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessor.scala
@@ -0,0 +1,465 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{functions => F}
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.util.QuotingUtils
+import org.apache.spark.sql.classic.DataFrame
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * Per-microbatch processor for SCD Type 1 AutoCDC flows, complying to the specified [[changeArgs]]
+ * configuration.
+ *
+ * @param changeArgs The CDC flow configuration.
+ * @param resolvedSequencingType The post-analysis [[DataType]] of the sequencing column, derived
+ *                               from the flow's resolved DataFrame at flow setup time.
+ */
+case class Scd1BatchProcessor(
+    changeArgs: ChangeArgs,
+    resolvedSequencingType: DataType) {
+
+  /**
+   * Reconcile a CDC microbatch into the canonical form that the auxiliary- and target-table
+   * merges consume. Composes the per-step transforms in the only order that produces correct
+   * SCD1 semantics:
+   *
+   *   1. [[deduplicateMicrobatch]]: collapse same-key events to the latest by sequence.
+   *   2. [[extendMicrobatchRowsWithCdcMetadata]]: project the operational `_cdc_metadata` column
+   *      (must run before column selection, which may drop inputs the metadata expressions
+   *      reference).
+   *   3. [[projectTargetColumnsOntoMicrobatch]]: apply the user-defined column selection while
+   *      preserving the CDC metadata column.
+   *   4. [[applyTombstonesToMicrobatch]]: filter out late-arriving events superseded by
+   *      tombstones already recorded in the auxiliary table.
+   *
+   * The per-step methods are kept package-visible so that focused unit tests can pin each
+   * transform's behavior independently. This method itself is package-visible so that
+   * [[Scd1ForeachBatchHandler]] can call it after running [[ScdBatchValidator.validateMicrobatch]]
+   * - validation is intentionally not folded in here, as it must run before any of these
+   * transforms touch the data.
+   *
+   * @param batchDf          The validated incoming CDC microbatch.
+   * @param auxiliaryTableDf A snapshot of the auxiliary table for tombstone reconciliation.
+   *                         Must contain at minimum the key columns + `_cdc_metadata`.
+   * @return The reconciled microbatch, ready to be merged onto both tables.
+   */
+  private[autocdc] def reconcileMicrobatch(
+      batchDf: DataFrame,
+      auxiliaryTableDf: DataFrame): DataFrame = {
+    val deduplicated = deduplicateMicrobatch(validatedMicrobatch = batchDf)
+    val withCdcMetadata = extendMicrobatchRowsWithCdcMetadata(validatedMicrobatch = deduplicated)
+    val projected = projectTargetColumnsOntoMicrobatch(
+      microbatchWithCdcMetadataDf = withCdcMetadata
+    )
+    applyTombstonesToMicrobatch(
+      microbatchDf = projected,
+      auxiliaryTableDf = auxiliaryTableDf
+    )
+  }
+
+  /**
+   * Deduplicate the incoming CDC microbatch by key, keeping the most recent event per key
+   * as ordered by [[ChangeArgs.sequencing]].
+   *
+   * For SCD1 we only care about the most recent (by sequence value) event per key. When
+   * multiple events share the same key and the same sequence value, the row selected is
+   * non-deterministic and undefined.
+   *
+   * @param validatedMicrobatch A microbatch that has already been validated such that the
+   *                            sequencing column should not contain null values, and its data type
+   *                            should support ordering.
+   *
+   * The schema of the returned dataframe matches the schema of the microbatch exactly.
+   */
+  private[autocdc] def deduplicateMicrobatch(validatedMicrobatch: DataFrame): DataFrame = {
+    // The `max_by` API can only return a single column, so pack/unpack the entire row into a
+    // temporary column before and after the `max_by` operation.
+    val winningRowCol = Scd1BatchProcessor.winningRowColName
+
+    val allMicrobatchColumns =
+      validatedMicrobatch.columns
+        .map(colName => F.col(QuotingUtils.quoteIdentifier(colName)))
+        .toImmutableArraySeq
+
+    validatedMicrobatch
+      .groupBy(changeArgs.keys.map(k => F.col(k.quoted)): _*)
+      .agg(
+        F.max_by(F.struct(allMicrobatchColumns: _*), changeArgs.sequencing)
+          .as(winningRowCol)
+      )
+      .select(F.col(s"$winningRowCol.*"))
+  }
+
+  /**
+   * Project the CDC metadata column onto the microbatch.
+   *
+   * This must run before any column selection is applied to the microbatch. The
+   * [[ChangeArgs.deleteCondition]] and [[ChangeArgs.sequencing]] expressions are evaluated against
+   * the current microbatch schema, and column selection may drop inputs required by those
+   * expressions.
+   *
+   * Rows are classified as deletes only when [[ChangeArgs.deleteCondition]] evaluates to true. A
+   * false or null delete condition classifies the row as an upsert.
+   *
+   * @param validatedMicrobatch A microbatch that has already been validated such that the
+   *                            sequencing column should not contain null values, and its data type
+   *                            should support ordering.
+   *
+   * The returned dataframe has all of the columns in the input microbatch + the CDC metadata
+   * column.
+   */
+  private[autocdc] def extendMicrobatchRowsWithCdcMetadata(
+      validatedMicrobatch: DataFrame): DataFrame = {
+    val rowDeleteSequence: Column = changeArgs.deleteCondition match {
+      case Some(deleteCondition) =>
+        F.when(deleteCondition, changeArgs.sequencing).otherwise(F.lit(null))
+      case None =>
+        F.lit(null)
+    }
+
+    val rowUpsertSequence: Column =
+      // A row that is not a delete must be an upsert, these are mutually exclusive and a complete
+      // set of CDC event types.
+      F.when(rowDeleteSequence.isNull, changeArgs.sequencing).otherwise(F.lit(null))
+
+    validatedMicrobatch.withColumn(
+      Scd1BatchProcessor.cdcMetadataColName,
+      Scd1BatchProcessor.constructCdcMetadataCol(
+        deleteSequence = rowDeleteSequence,
+        upsertSequence = rowUpsertSequence,
+        sequencingType = resolvedSequencingType
+      )
+    )
+  }
+
+  /**
+   * Project the user-defined column selection onto the microbatch. By this point the input
+   * microbatch should already have projected its CDC metadata, because it's possible that the
+   * user-defined column selection drops columns that are otherwise necessary to compute the
+   * CDC metadata.
+   *
+   * Returned dataframe's schema is: all of the user-selected columns in the input dataframe as per
+   * [[ChangeArgs.columnSelection]] + the CDC metadata column.
+   */
+  private[autocdc] def projectTargetColumnsOntoMicrobatch(
+      microbatchWithCdcMetadataDf: DataFrame): DataFrame = {
+    val caseSensitiveColumnComparison =
+      microbatchWithCdcMetadataDf.sparkSession.sessionState.conf.caseSensitiveAnalysis
+
+    // The user schema is the microbatch schema after dropping the system CDC metadata column.
+    // We project out the system column before applying user selection and project it back in
+    // afterwards, so that users cannot control whether this [necessary] column shows up in the
+    // target table.
+    val userColumnsInMicrobatchSchema = ColumnSelection.applyToSchema(
+      schemaName = "microbatch",
+      schema = microbatchWithCdcMetadataDf.schema,
+      columnSelection = Some(
+        ColumnSelection.ExcludeColumns(
+          Seq(UnqualifiedColumnName(Scd1BatchProcessor.cdcMetadataColName))
+        )
+      ),
+      caseSensitive = caseSensitiveColumnComparison
+    )
+
+    val userSelectedColumnsInMicrobatchSchema =
+      ColumnSelection.applyToSchema(
+        schemaName = "microbatch",
+        schema = userColumnsInMicrobatchSchema,
+        columnSelection = changeArgs.columnSelection,
+        caseSensitive = caseSensitiveColumnComparison
+      )
+
+    // In addition to the explicit user-selected columns, re-project the operational CDC metadata
+    // column as the last column.
+    val finalColumnsInMicrobatchToSelect =
+      userSelectedColumnsInMicrobatchSchema.fieldNames.map(colName => {
+        // Spark drops backticks in the schema, quote all identifiers for safety before executing
+        // select. Identifiers could have special characters such as '.'.
+        F.col(QuotingUtils.quoteIdentifier(colName))
+      }) :+ F.col(
+        Scd1BatchProcessor.cdcMetadataColName
+      )
+
+    microbatchWithCdcMetadataDf.select(
+      finalColumnsInMicrobatchToSelect.toImmutableArraySeq: _*
+    )
+  }
+
+  /**
+   * Left anti-join the microbatch with the auxiliary table on tombstones that match against and
+   * effectively delete late-arriving upserts (or stale deletes).
+   *
+   * @param microbatchDf The incoming microbatch dataframe with at minimum all of the key
+   *                     columns + CDC metadata column.
+   * @param auxiliaryTableDf Dataframe representing the auxiliary table, with at minimum the key
+   *                         columns + CDC metadata column.
+   *
+   * The returned filtered dataframe has the same schema as the input microbatch, but with only
+   * the rows that remain unaffected by any known tombstones.
+   */
+  private[autocdc] def applyTombstonesToMicrobatch(
+      microbatchDf: DataFrame,
+      auxiliaryTableDf: DataFrame): DataFrame = {
+    val aliasedMicrobatchDf = microbatchDf.alias("microbatch")
+    val aliasedAuxiliaryTableDf = auxiliaryTableDf.alias("auxiliaryTable")
+
+    val cdcMetadata = Scd1BatchProcessor.cdcMetadataColName
+
+    val microbatchCdcMetadata = F.col(s"microbatch.$cdcMetadata")
+    val effectiveSeq = F.greatest(
+      Scd1BatchProcessor.deleteSequenceOf(microbatchCdcMetadata),
+      Scd1BatchProcessor.upsertSequenceOf(microbatchCdcMetadata)
+    )
+    val tombstoneDeleteSeq =
+      Scd1BatchProcessor.deleteSequenceOf(F.col(s"auxiliaryTable.$cdcMetadata"))
+
+    val keysMatch = changeArgs.keys
+      .map { k =>
+        F.col(s"microbatch.${k.quoted}") === F.col(s"auxiliaryTable.${k.quoted}")
+      }
+      .reduce(_ && _)
+
+    // A microbatch row is considered late-arriving (and therefore deleted by the tombstone) when
+    // the auxiliary table holds a tombstone for the same key with a strictly larger delete
+    // sequence. Both late-arriving upserts and deletes are dropped.
+    val microbatchRowDeletedByTombstone = effectiveSeq < tombstoneDeleteSeq
+
+    aliasedMicrobatchDf.join(
+      right = aliasedAuxiliaryTableDf,
+      joinExprs = keysMatch && microbatchRowDeletedByTombstone,
+      joinType = "left_anti"
+    )
+  }
+
+  /**
+   * Merge the reconciled (deduplicated per key) microbatch onto the auxiliary table,
+   * advancing or deleting existing tombstones and inserting new tombstones for previously
+   * untracked keys.
+   *
+   * After the merge, the auxiliary table has the same schema as before, but with the latest
+   * tombstone data per key.
+   *
+   * @param reconciledMicrobatchDf   The deduplicated microbatch.
+   * @param auxiliaryTableIdentifier The identifier of the auxiliary table.
+   */
+  private[autocdc] def mergeMicrobatchOntoAuxiliaryTable(
+      reconciledMicrobatchDf: DataFrame,
+      auxiliaryTableIdentifier: TableIdentifier
+  ): Unit = {
+    val auxIdentQuoted = auxiliaryTableIdentifier.quotedString
+    val meta = Scd1BatchProcessor.cdcMetadataColName
+
+    // Project the reconciled microbatch down to just keys + `_cdc_metadata`; data columns are
+    // irrelevant for the auxiliary table and should not be persisted.
+    val reducedMicrobatch = reconciledMicrobatchDf
+      .select(changeArgs.keys.map(k => F.col(k.quoted)) :+ F.col(meta): _*)
+      .as("reducedMicrobatch")
+
+    val microbatchCdcMetadata: Column = F.col(s"reducedMicrobatch.`$meta`")
+    val incomingDelete: Column = Scd1BatchProcessor.deleteSequenceOf(microbatchCdcMetadata)
+    val incomingUpsert: Column = Scd1BatchProcessor.upsertSequenceOf(microbatchCdcMetadata)
+
+    val auxCdcMetadata: Column = F.col(s"$auxIdentQuoted.`$meta`")
+    val auxDelete: Column = Scd1BatchProcessor.deleteSequenceOf(auxCdcMetadata)
+
+    val doKeysMatch = changeArgs.keys
+      .map(k => F.col(s"reducedMicrobatch.${k.quoted}") === F.col(s"$auxIdentQuoted.${k.quoted}"))
+      .reduce(_ && _)
+
+    val incomingRowRepresentsDeleteEvent =
+      incomingDelete.isNotNull && (incomingUpsert.isNull || incomingDelete > incomingUpsert)
+
+    reducedMicrobatch
+      .mergeInto(auxIdentQuoted, doKeysMatch)
+      // Incoming delete is newer than the stored one: advance the high-water mark.
+      .whenMatched(
+        incomingRowRepresentsDeleteEvent && incomingDelete > auxDelete
+      )
+      .update(Map(s"$auxIdentQuoted.`$meta`" -> microbatchCdcMetadata))
+      // Incoming upsert is newer than the stored delete: the key was re-inserted after the
+      // delete, so the aux tombstone is stale - remove it to prevent unbounded growth.
+      .whenMatched(
+        !incomingRowRepresentsDeleteEvent && incomingUpsert >= auxDelete
+      )
+      .delete()
+      // New delete for a key not yet tracked, add it to auxiliary table. Note that in the
+      // reconciled microbatch, there is at most one event for key, which represents the latest
+      // known event for the key. If the latest known event is a delete, it must be a tombstone.
+      .whenNotMatched(incomingRowRepresentsDeleteEvent)
+      .insertAll()
+      .merge()
+  }
+
+  /**
+   * Merge the reconciled (deduplicated, tombstone applied, and column selection + metadata
+   * column projected) microbatch onto the target table, as per SCD1 semantics.
+   *
+   * Microbatch invariants:
+   *   - Exactly one of {upsert, delete} version is non-null, the other is null.
+   *   - There is at most one event per key, representing the latest known event for the key
+   *     across the microbatch and auxiliary table.
+   *
+   * Target table invariants:
+   *   - Target table only contains live rows; delete sequence is always null, upsert sequence
+   *     is always non-null.
+   *
+   * @param reconciledMicrobatchDf The reconciled microbatch dataframe.
+   * @param targetTableIdentifier  The identifier of the target table.
+   */
+  private[autocdc] def mergeMicrobatchOntoTarget(
+      reconciledMicrobatchDf: DataFrame,
+      targetTableIdentifier: TableIdentifier
+  ): Unit = {
+    val meta = Scd1BatchProcessor.cdcMetadataColName
+
+    val destinationTableStr = targetTableIdentifier.quotedString
+    // (Re-)alias the reconciled microbatch DF for easy reference for the remainder of the merge.
+    val microbatchDf = reconciledMicrobatchDf.as("microbatch")
+
+    val microbatchCdcMetadataCol = F.col(s"microbatch.`$meta`")
+    val destinationCdcMetadataCol =
+      F.col(s"$destinationTableStr.`$meta`")
+
+    val microbatchDeleteVersionField =
+      Scd1BatchProcessor.deleteSequenceOf(microbatchCdcMetadataCol)
+    val microbatchUpsertVersionField =
+      Scd1BatchProcessor.upsertSequenceOf(microbatchCdcMetadataCol)
+    val destinationUpsertVersionField =
+      Scd1BatchProcessor.upsertSequenceOf(destinationCdcMetadataCol)
+
+    val keysMatch = changeArgs.keys
+      .map(k =>
+        F.col(s"microbatch.${k.quoted}") === F.col(s"$destinationTableStr.${k.quoted}")
+      )
+      .reduce(_ && _)
+
+    // Upsert beats existing row if incoming upsert sequence is geq to the upsert sequence on
+    // the target.
+    val incomingWinsUpsert = microbatchUpsertVersionField.isNotNull &&
+      microbatchUpsertVersionField >= destinationUpsertVersionField
+
+    // Delete beats existing row if delete sequencing is strictly greater than the upsert
+    // sequence on the target. This is an arbitrary but deliberate choice to maintain that
+    // upserts get priority over deletes on duplicate sequencing.
+    val incomingWinsDelete = microbatchDeleteVersionField.isNotNull &&
+      microbatchDeleteVersionField > destinationUpsertVersionField
+
+    val resolver = microbatchDf.sparkSession.sessionState.conf.resolver
+    val keyNames = changeArgs.keys.map(_.name)
+
+    def constructTargetColumnAssignmentsFromMicrobatch(columnName: String): (String, Column) = {
+      // Map a column in the target table to its direct equivalent in the microbatch. Note that
+      // because of target-table schema evolution during SDP dataset materialization, the
+      // microbatch's columns are always a subset of (or equal to) the target's columns.
+      val quotedCol = QuotingUtils.quoteIdentifier(columnName)
+      s"$destinationTableStr.$quotedCol" -> F.col(s"microbatch.$quotedCol")
+    }
+
+    // Most merge implementations require that join columns are not mutated, even when the
+    // mutation would be a no-op. The remaining microbatch columns (including the CDC metadata
+    // column) are overwritten outright when the incoming upsert wins.
+    val columnsToUpdateWhenIncomingWinsUpsert: Map[String, Column] =
+      microbatchDf.columns
+        .filterNot(c => keyNames.exists(resolver(_, c)))
+        .map(constructTargetColumnAssignmentsFromMicrobatch)
+        .toMap
+
+    val columnsToInsertOnNewKey: Map[String, Column] =
+      microbatchDf.columns
+        .map(constructTargetColumnAssignmentsFromMicrobatch)
+        .toMap
+
+    microbatchDf
+      .mergeInto(destinationTableStr, keysMatch)
+      .whenMatched(incomingWinsDelete)
+      .delete()
+      .whenMatched(incomingWinsUpsert)
+      .update(columnsToUpdateWhenIncomingWinsUpsert)
+      // New key: only insert upserts; deletes for absent keys are no-ops for the target table
+      // merge, and instead would have been inserted as tombstones into the auxiliary table.
+      .whenNotMatched(microbatchDeleteVersionField.isNull)
+      // When inserting a brand new row for a new key, construct column mappings from microbatch.
+      // The microbatch's columns may be a strict subset of the target's columns -- e.g. the user
+      // narrowed `column_list` between runs, or the source DF dropped a column. The target's
+      // columns can never be a strict subset of the microbatch's, however, because SDP's schema
+      // evolution always unions old and new schemas onto the target.
+      .insert(columnsToInsertOnNewKey)
+      .merge()
+  }
+}
+
+object Scd1BatchProcessor {
+  /**
+   * Internal columns inserted by AutoCDC reconciliation. Source change-data-feed dataframes must
+   * not contain any columns starting with [[AutoCdcReservedNames.prefix]]; the invariant is
+   * enforced at [[org.apache.spark.sql.pipelines.graph.AutoCdcMergeFlow]] construction.
+   */
+  private[autocdc] val winningRowColName: String = s"${AutoCdcReservedNames.prefix}winning_row"
+  private[pipelines] val cdcMetadataColName: String = s"${AutoCdcReservedNames.prefix}metadata"
+
+  private[pipelines] val cdcDeleteSequenceFieldName: String = "deleteSequence"
+  private[pipelines] val cdcUpsertSequenceFieldName: String = "upsertSequence"
+
+  /** Project the delete sequence out of the CDC metadata column. */
+  private[autocdc] def deleteSequenceOf(cdcMetadataCol: Column): Column =
+    cdcMetadataCol.getField(cdcDeleteSequenceFieldName)
+
+  /** Project the upsert sequence out of the CDC metadata column. */
+  private[autocdc] def upsertSequenceOf(cdcMetadataCol: Column): Column =
+    cdcMetadataCol.getField(cdcUpsertSequenceFieldName)
+
+  /**
+   * Schema of the CDC metadata struct column for SCD1.
+   */
+  private[pipelines] def cdcMetadataColSchema(sequencingType: DataType): StructType =
+    StructType(
+      Seq(
+        // The sequencing of the event if it represents a delete, null otherwise.
+        StructField(cdcDeleteSequenceFieldName, sequencingType, nullable = true),
+        // The sequencing of the event if it represents an upsert, null otherwise.
+        StructField(cdcUpsertSequenceFieldName, sequencingType, nullable = true)
+      )
+    )
+
+  /**
+   * Construct the CDC metadata struct column for SCD1, following the exact schema and field
+   * ordering defined by [[cdcMetadataColSchema]].
+   */
+  private[pipelines] def constructCdcMetadataCol(
+      deleteSequence: Column,
+      upsertSequence: Column,
+      sequencingType: DataType): Column = {
+    val cdcMetadataFieldsInOrder = cdcMetadataColSchema(sequencingType).fields.map { field =>
+      val value = field.name match {
+        case `cdcDeleteSequenceFieldName` => deleteSequence
+        case `cdcUpsertSequenceFieldName` => upsertSequence
+        case other =>
+          throw SparkException.internalError(
+            s"Unable to construct SCD1 CDC metadata column due to unknown `${other}` field."
+          )
+      }
+      value.cast(field.dataType).as(field.name)
+    }
+    F.struct(cdcMetadataFieldsInOrder.toImmutableArraySeq: _*)
+  }
+}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1ForeachBatchHandler.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1ForeachBatchHandler.scala
new file mode 100644
index 0000000000000..c286f26c8263c
--- /dev/null
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1ForeachBatchHandler.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.classic.DataFrame
+
+/**
+ * Exposes an API to execute one SCD Type 1 AutoCDC microbatch reconciliation on a
+ * foreachBatch streaming query.
+ */
+case class Scd1ForeachBatchHandler(
+    batchProcessor: Scd1BatchProcessor,
+    auxiliaryTableIdentifier: TableIdentifier,
+    targetTableIdentifier: TableIdentifier) {
+
+  /**
+   * Process a single CDC microbatch and merge it into the auxiliary and target tables.
+   *
+   * Idempotent under same-`batchId` replay: both merges are gated on sequence inequalities,
+   * so a partial failure between them is reconciled correctly when foreachBatch retries the
+   * whole batch.
+   */
+  def execute(batchDf: DataFrame, batchId: Long): Unit = {
+    ScdBatchValidator(
+      destinationIdentifier = targetTableIdentifier,
+      changeArgs = batchProcessor.changeArgs,
+      batchDf = batchDf,
+      batchId = batchId
+    ).validateMicrobatch()
+
+    val reconciledMicrobatch = batchProcessor.reconcileMicrobatch(
+      batchDf = batchDf,
+      // Aux holds at most one row per currently-active tombstone (revived keys are GC'd
+      // by mergeMicrobatchOntoAuxiliaryTable), so it generally stays small enough for a broadcast
+      // join. Future optimizations: key-pruned reads, table format-aware clustering and tombstone
+      // TTL.
+      auxiliaryTableDf = batchDf.sparkSession.read.table(
+        auxiliaryTableIdentifier.quotedString
+      )
+    )
+
+    batchProcessor.mergeMicrobatchOntoAuxiliaryTable(
+      reconciledMicrobatchDf = reconciledMicrobatch,
+      auxiliaryTableIdentifier = auxiliaryTableIdentifier
+    )
+
+    // Failure between these two merges is safe under foreachBatch retry: the aux merge
+    // only ever mutates a tombstone when this batch's event makes it stale (strictly newer
+    // delete advances it) or redundant (`>=` upsert revives the key, GC'ing the tombstone),
+    // so on retry those preconditions no longer hold against the just-advanced aux state -
+    // the aux merge is a no-op and the target merge replays as if for the first time.
+    batchProcessor.mergeMicrobatchOntoTarget(
+      reconciledMicrobatchDf = reconciledMicrobatch,
+      targetTableIdentifier = targetTableIdentifier
+    )
+  }
+}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ScdBatchValidator.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ScdBatchValidator.scala
new file mode 100644
index 0000000000000..0d2f47d1c4a6e
--- /dev/null
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ScdBatchValidator.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.apache.spark.sql.{functions => F, AnalysisException, Column}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.expressions.RowOrdering
+import org.apache.spark.sql.classic.DataFrame
+
+/**
+ * Per-microbatch input validation shared by SCD merge executors. Throws with a clear,
+ * user-actionable error if the batch violates the CDC contract.
+ *
+ * @param destinationIdentifier The identifier of the target table, used for error messages.
+ * @param changeArgs The user-specified AutoCDC parameters.
+ * @param batchDf The incoming microbatch to validate.
+ * @param batchId The structured-streaming batch id, used for error messages.
+ */
+case class ScdBatchValidator(
+    destinationIdentifier: TableIdentifier,
+    changeArgs: ChangeArgs,
+    batchDf: DataFrame,
+    batchId: Long) {
+
+  /**
+   * Validates that the sequencing column is orderable and that no row has a null sequencing
+   * value or a null value in any key column. The per-row checks are folded into a single
+   * aggregation so the microbatch is scanned exactly once.
+   */
+  def validateMicrobatch(): Unit = {
+    val seqType = batchDf.select(changeArgs.sequencing).schema.head.dataType
+    if (!RowOrdering.isOrderable(seqType)) {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_MICROBATCH_VALIDATION.NON_ORDERABLE_SEQUENCE",
+        messageParameters = Map(
+          "tableName" -> destinationIdentifier.quotedString,
+          "batchId" -> batchId.toString,
+          "dataType" -> seqType.catalogString
+        )
+      )
+    }
+
+    val sequencingNullCount: Column =
+      F.count(F.when(changeArgs.sequencing.isNull, F.lit(1))).as("__autocdc_seq_null_count")
+    val perKeyNullCount: Seq[Column] = changeArgs.keys.map { key =>
+      F.count(F.when(F.col(key.quoted).isNull, F.lit(1)))
+        .as(s"__autocdc_key_null_count_${key.name}")
+    }
+    // The null count aggregations are laid out in the returned dataframe as:
+    // [# rows with null sequence, # rows with null for key1, ..., # rows with null for keyN].
+    val nullCountsResultDf =
+      batchDf.agg(sequencingNullCount, perKeyNullCount: _*).head()
+
+    val numRowsWithNullSequence = nullCountsResultDf.getLong(0)
+    if (numRowsWithNullSequence > 0) {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_MICROBATCH_VALIDATION.NULL_SEQUENCE",
+        messageParameters = Map(
+          "tableName" -> destinationIdentifier.quotedString,
+          "batchId" -> batchId.toString,
+          "nullCount" -> numRowsWithNullSequence.toString
+        )
+      )
+    }
+
+    val keysWithNullEntries = changeArgs.keys.zipWithIndex.flatMap { case (key, idx) =>
+      val rowCountForKey = nullCountsResultDf.getLong(idx + 1)
+      Option.when(rowCountForKey > 0)(key -> rowCountForKey)
+    }
+    if (keysWithNullEntries.nonEmpty) {
+      val nullKeyCounts = keysWithNullEntries
+        .map { case (key, count) => s"${key.quoted}=$count" }
+        .mkString(", ")
+
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_MICROBATCH_VALIDATION.NULL_KEY",
+        messageParameters = Map(
+          "tableName" -> destinationIdentifier.quotedString,
+          "batchId" -> batchId.toString,
+          "nullKeyCounts" -> nullKeyCounts
+        )
+      )
+    }
+  }
+}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala
index 38fde0bfec4a1..66f2995ee02d9 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala
@@ -176,7 +176,7 @@ private class FlowResolver(rawGraph: DataflowGraph) {
           } else {
             f
           }
-          convertResolvedToTypedFlow(flowToResolve, maybeNewFuncResult)
+          resolveFlow(flowToResolve, maybeNewFuncResult)
 
         // If the flow failed due to an UnresolvedDatasetException, it means that one of the
         // flow's inputs wasn't available. After other flows are resolved, these inputs
@@ -199,9 +199,18 @@ private class FlowResolver(rawGraph: DataflowGraph) {
       }
   }
 
-  private def convertResolvedToTypedFlow(
+  private def resolveFlow(
       flow: UnresolvedFlow,
       funcResult: FlowFunctionResult): ResolvedFlow = {
+    flow match {
+      case acf: AutoCdcFlow => new AutoCdcMergeFlow(acf, funcResult)
+      case utf: UntypedFlow => transformUntypedFlowToResolvedFlow(utf, funcResult)
+    }
+  }
+
+  private def transformUntypedFlowToResolvedFlow(
+      flow: UntypedFlow,
+      funcResult: FlowFunctionResult): ResolvedFlow = {
     flow match {
       case _ if flow.once => new AppendOnceFlow(flow, funcResult)
       case _ if funcResult.dataFrame.get.isStreaming =>
@@ -210,7 +219,7 @@ private class FlowResolver(rawGraph: DataflowGraph) {
         // then get their results overwritten.
         val mustBeAppend = rawGraph.flowsTo(flow.destinationIdentifier).size > 1
         new StreamingFlow(flow, funcResult, mustBeAppend = mustBeAppend)
-      case _: UnresolvedFlow => new CompleteFlow(flow, funcResult)
+      case _ => new CompleteFlow(flow, funcResult)
     }
   }
 }
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/DatasetManager.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/DatasetManager.scala
index 4affbe4637dba..456edca8d1e22 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/DatasetManager.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/DatasetManager.scala
@@ -303,6 +303,20 @@ object DatasetManager extends Logging {
       context.spark.sql(s"TRUNCATE TABLE ${table.identifier.quotedString}")
     }
 
+    if (isFullRefresh) {
+      // On full refresh, drop the AutoCDC auxiliary state associated with this table (if any) so
+      // that stale delete-tracking data and table properties are not carried forward into the new
+      // table generation. We unconditionally issue the DROP for every fully-refreshed target.
+
+      // Intentionally DROP and not TRUNCATE: the auxiliary table is an internal state store
+      // that is not part of the dataflow graph, so it does not participate in regular schema
+      // evolution like user tables do. On a full refresh we want a clean recreation against
+      // the new target schema rather than carrying forward the previous generation's layout.
+
+      val auxiliaryTableId = AutoCdcAuxiliaryTable.identifier(table.identifier)
+      context.spark.sql(s"DROP TABLE IF EXISTS ${auxiliaryTableId.quotedString}")
+    }
+
     // Alter the table if we need to
     existingTableOpt.foreach { existingTable =>
       val existingSchema = v2ColumnsToStructType(existingTable.columns())
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala
index e329308502f0d..f88b0cd3a1cbe 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala
@@ -19,12 +19,20 @@ package org.apache.spark.sql.pipelines.graph
 
 import scala.util.Try
 
+import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{functions => F, AnalysisException, Column}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.classic.DataFrame
-import org.apache.spark.sql.pipelines.AnalysisWarning
-import org.apache.spark.sql.pipelines.util.InputReadOptions
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.pipelines.autocdc.{
+  AutoCdcReservedNames,
+  CaseSensitivityLabels,
+  ChangeArgs,
+  ColumnSelection,
+  Scd1BatchProcessor,
+  ScdType
+}
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
 
 /**
  * Contains the catalog and database context information for query execution.
@@ -99,8 +107,7 @@ case class FlowFunctionResult(
     streamingInputs: Set[ResolvedInput],
     usedExternalInputs: Set[TableIdentifier],
     dataFrame: Try[DataFrame],
-    sqlConf: Map[String, String],
-    analysisWarnings: Seq[AnalysisWarning] = Nil) {
+    sqlConf: Map[String, String]) {
 
   /**
    * Returns the names of all of the [[Input]]s used when resolving this [[Flow]]. If the
@@ -121,7 +128,21 @@ case class FlowFunctionResult(
 }
 
 /** A [[Flow]] whose output schema and dependencies aren't known. */
-case class UnresolvedFlow(
+sealed trait UnresolvedFlow extends Flow {
+  /** Returns a copy of this flow with the given SQL confs overriding the existing ones. */
+  def withSqlConf(newSqlConf: Map[String, String]): UnresolvedFlow
+}
+
+/**
+ * An [[UnresolvedFlow]] whose execution-type has not yet been determined.
+ *
+ * In some cases, we know the execution-type for an [[UnresolvedFlow]] even before flow analysis
+ * and resolution. For example, an [[AutoCdcFlow]] is a special unresolved-but-typed flow; we
+ * know a flow will be an AutoCDC flow immediately on construction, because it has its own
+ * special registration API. Such flows are considered "typed flows", but there isn't any
+ * semantic reason yet to explicitly introduce a `TypedFlow` trait/class.
+ */
+case class UntypedFlow(
     identifier: TableIdentifier,
     destinationIdentifier: TableIdentifier,
     func: FlowFunction,
@@ -129,7 +150,33 @@ case class UnresolvedFlow(
     sqlConf: Map[String, String],
     override val once: Boolean,
     override val origin: QueryOrigin
-) extends Flow
+) extends UnresolvedFlow {
+  override def withSqlConf(newSqlConf: Map[String, String]): UntypedFlow =
+    copy(sqlConf = newSqlConf)
+}
+
+/**
+ * An unresolved but typed flow that applies a CDC event stream to a target table via MERGE.
+ *
+ * [[AutoCdcFlow]] is a typed flow because it is only supported for streaming, and not as a once
+ * flow. Therefore by definition it is a streaming-type flow.
+ *
+ * In the future, support for once-mode [[AutoCdcFlow]] may be added.
+ */
+case class AutoCdcFlow(
+    identifier: TableIdentifier,
+    destinationIdentifier: TableIdentifier,
+    func: FlowFunction,
+    queryContext: QueryContext,
+    override val origin: QueryOrigin,
+    changeArgs: ChangeArgs,
+    sqlConf: Map[String, String] = Map.empty
+) extends UnresolvedFlow {
+  override val once: Boolean = false
+
+  override def withSqlConf(newSqlConf: Map[String, String]): AutoCdcFlow =
+    copy(sqlConf = newSqlConf)
+}
 
 /**
  * A [[Flow]] whose flow function has been invoked, meaning either:
@@ -167,7 +214,8 @@ trait ResolvedFlow extends ResolutionCompletedFlow with Input {
 
   /** Returns the schema of the output of this [[Flow]]. */
   def schema: StructType = df.schema
-  override def load(readOptions: InputReadOptions): DataFrame = df
+  override def load(asStreaming: Boolean): DataFrame = df
+
   def inputs: Set[TableIdentifier] = funcResult.inputs
 }
 
@@ -194,3 +242,157 @@ class AppendOnceFlow(
 
   override val once = true
 }
+
+/**
+ * A resolved flow that applies a CDC event stream to a target table via MERGE, in accordance
+ * with the configured [[flow.changeArgs]].
+ */
+class AutoCdcMergeFlow(
+    val flow: AutoCdcFlow,
+    val funcResult: FlowFunctionResult
+) extends ResolvedFlow {
+  requireReservedPrefixAbsentInSourceColumns()
+
+  def changeArgs: ChangeArgs = flow.changeArgs
+
+  /** The user-selected projection of [[df.schema]] (i.e. before the SCD metadata column). */
+  private val userSelectedSchema: StructType = {
+    val selectedSchema = ColumnSelection.applyToSchema(
+      schemaName = "changeDataFeed",
+      schema = df.schema,
+      columnSelection = changeArgs.columnSelection,
+      caseSensitive = spark.sessionState.conf.caseSensitiveAnalysis
+    )
+    // AutoCDC flows require all key columns to be present in the user-selected source schema,
+    // so that they survive into the target table where SCD reconciliation needs them.
+    requireKeysPresentInSelectedSchema(selectedSchema)
+    selectedSchema
+  }
+
+  /** The DataType of the sequencing expression, derived once from the source change feed. */
+  private[graph] val sequencingType: DataType =
+    df.select(changeArgs.sequencing).schema.head.dataType
+
+  /**
+   * Returns the augmented output schema of this flow, which can differ from the schema of the
+   * source change-data-feed dataframe.
+   *
+   * The source dataframe's schema describes the incoming CDC events; the augmented schema here
+   * applies the user-specified [[ColumnSelection]] and appends the SCD-specific metadata
+   * columns that the AutoCDC MERGE engine projects onto the target table. Downstream
+   * dependencies in the pipeline see this augmented schema.
+   */
+  override val schema: StructType = changeArgs.storedAsScdType match {
+    case ScdType.Type1 =>
+      // SCD1 produces a target table with all the user-selected output columns and a projected
+      // CDC operational metadata column at the end.
+      StructType(
+        userSelectedSchema.fields :+ StructField(
+          Scd1BatchProcessor.cdcMetadataColName,
+          Scd1BatchProcessor.cdcMetadataColSchema(sequencingType),
+          nullable = false
+        )
+      )
+    case ScdType.Type2 =>
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_SCD2_NOT_SUPPORTED",
+        messageParameters = Map.empty
+      )
+  }
+
+  /**
+   * Returns an empty dataframe whose schema matches [[AutoCdcMergeFlow.schema]]. By construction,
+   * the returned dataframe will be a streaming dataframe.
+   *
+   * Today, [[AutoCdcMergeFlow.load]] is not actually ever called during graph analysis or
+   * execution. An AutoCdcMergeFlow can only be an input to a streaming table (not an MV or
+   * persisted/temp view), and streaming tables take a [[VirtualTableInput]] as input, not
+   * the producing [[Flow]] directly. [[VirtualTableInput]] overrides its own [[load]] to do
+   * schema inference on its input flows, rather than a transitive [[ResolvedFlow.load]].
+   *
+   * The implementation exists for API consistency and throws an internal error if invoked with
+   * `asStreaming = false`, or if the underlying source dataframe is not streaming, to surface
+   * a misuse loudly rather than silently producing a non-streaming dataframe.
+   */
+  override def load(asStreaming: Boolean): DataFrame = {
+    if (!asStreaming) {
+      throw SparkException.internalError(
+        "Attempted to load AutoCDC flow as a batch flow. AutoCDC flows are strictly streaming " +
+        "flows, and must be loaded as such."
+      )
+    }
+    if (!df.isStreaming) {
+      throw SparkException.internalError(
+        "AutoCDC source dataframe is not streaming. AutoCDC flows are strictly streaming flows, " +
+        "and must be backed by a streaming source."
+      )
+    }
+    changeArgs.storedAsScdType match {
+      case ScdType.Type1 =>
+        val userSelectedCols: Seq[Column] = userSelectedSchema.fieldNames.toSeq.map(F.col)
+        val emptyCdcMetadataCol: Column = Scd1BatchProcessor.constructCdcMetadataCol(
+          deleteSequence = F.lit(null),
+          upsertSequence = F.lit(null),
+          sequencingType = sequencingType
+        ).as(Scd1BatchProcessor.cdcMetadataColName)
+
+        df.select(userSelectedCols :+ emptyCdcMetadataCol: _*)
+      case ScdType.Type2 =>
+        throw new AnalysisException(
+          errorClass = "AUTOCDC_SCD2_NOT_SUPPORTED",
+          messageParameters = Map.empty
+        )
+    }
+  }
+
+  /**
+   * Validate that the resolved source dataframe for the AutoCDC flow does not contain any column
+   * names that use the reserved Spark AutoCDC prefix.
+   */
+  private def requireReservedPrefixAbsentInSourceColumns(): Unit = {
+    val resolver = spark.sessionState.conf.resolver
+    val reservedPrefix = AutoCdcReservedNames.prefix
+
+    def nameContainsReservedPrefix(name: String): Boolean = {
+      name.length >= reservedPrefix.length && resolver(
+        name.substring(0, reservedPrefix.length),
+        reservedPrefix
+      )
+    }
+
+    df.schema.fieldNames.find(nameContainsReservedPrefix).foreach { conflictingColumnName =>
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_RESERVED_COLUMN_NAME_PREFIX_CONFLICT",
+        messageParameters = Map(
+          "caseSensitivity" -> CaseSensitivityLabels.of(
+            spark.sessionState.conf.caseSensitiveAnalysis
+          ),
+          "columnName" -> conflictingColumnName,
+          "schemaName" -> "changeDataFeed",
+          "reservedColumnNamePrefix" -> reservedPrefix
+        )
+      )
+    }
+  }
+
+  /**
+   * Validate all keys specified in changeArgs are actually present in the user-selected schema.
+   */
+  private def requireKeysPresentInSelectedSchema(selectedSchema: StructType): Unit = {
+    val resolver = spark.sessionState.conf.resolver
+
+    changeArgs.keys
+      .find(key => !selectedSchema.fieldNames.exists(name => resolver(name, key.name)))
+      .foreach { missingKey =>
+        throw new AnalysisException(
+          errorClass = "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA",
+          messageParameters = Map(
+            "caseSensitivity" -> CaseSensitivityLabels.of(
+              spark.sessionState.conf.caseSensitiveAnalysis
+            ),
+            "keyColumnName" -> missingKey.name
+          )
+        )
+      }
+  }
+}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala
index 1a00a6339c4ba..7e174f2b31078 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala
@@ -23,10 +23,8 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CTESubstitution, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
-import org.apache.spark.sql.classic.{DataFrame, Dataset, DataStreamReader, SparkSession}
-import org.apache.spark.sql.pipelines.AnalysisWarning
+import org.apache.spark.sql.classic.{DataFrame, DataFrameReader, Dataset, DataStreamReader, SparkSession}
 import org.apache.spark.sql.pipelines.graph.GraphIdentifierManager.{ExternalDatasetIdentifier, InternalDatasetIdentifier}
-import org.apache.spark.sql.pipelines.util.{BatchReadOptions, InputReadOptions, StreamingReadOptions}
 
 
 object FlowAnalysis {
@@ -64,8 +62,7 @@ object FlowAnalysis {
         streamingInputs = ctx.streamingInputs.toSet,
         usedExternalInputs = ctx.externalInputs.toSet,
         dataFrame = df,
-        sqlConf = confs,
-        analysisWarnings = ctx.analysisWarnings.toList
+        sqlConf = confs
       )
     }
   }
@@ -112,8 +109,7 @@ object FlowAnalysis {
           val resolved = readStreamInput(
             context,
             name = IdentifierHelper.toQuotedString(u.multipartIdentifier),
-            spark.readStream,
-            streamingReadOptions = StreamingReadOptions()
+            streamReader = spark.readStream.options(u.options)
           ).queryExecution.analyzed
           // Spark Connect requires the PLAN_ID_TAG to be propagated to the resolved plan
           // to allow correct analysis of the parent plan that contains this subquery
@@ -124,7 +120,7 @@ object FlowAnalysis {
           val resolved = readBatchInput(
             context,
             name = IdentifierHelper.toQuotedString(u.multipartIdentifier),
-            batchReadOptions = BatchReadOptions()
+            batchReader = spark.read.options(u.options)
           ).queryExecution.analyzed
           // Spark Connect requires the PLAN_ID_TAG to be propagated to the resolved plan
           // to allow correct analysis of the parent plan that contains this subquery
@@ -143,23 +139,25 @@ object FlowAnalysis {
    * All the public APIs that read from a dataset should call this function to read the dataset.
    *
    * @param name the name of the Dataset to be read.
-   * @param batchReadOptions Options for this batch read
+   * @param batchReader the batch dataframe reader, possibly with options, to execute the read
+   *                    with.
    * @return batch DataFrame that represents data from the specified Dataset.
    */
   final private def readBatchInput(
       context: FlowAnalysisContext,
       name: String,
-      batchReadOptions: BatchReadOptions
+      batchReader: DataFrameReader
   ): DataFrame = {
     GraphIdentifierManager.parseAndQualifyInputIdentifier(context, name) match {
       case inputIdentifier: InternalDatasetIdentifier =>
-        readGraphInput(context, inputIdentifier, batchReadOptions)
+        readGraphInput(context, inputIdentifier, isStreamingRead = false)
 
       case inputIdentifier: ExternalDatasetIdentifier =>
         readExternalBatchInput(
           context,
           inputIdentifier = inputIdentifier,
-          name = name
+          name = name,
+          batchReader = batchReader
         )
     }
   }
@@ -173,21 +171,19 @@ object FlowAnalysis {
    *
    * @param name the name of the Dataset to be read.
    * @param streamReader The [[DataStreamReader]] that may hold read options specified by the user.
-   * @param streamingReadOptions Options for this streaming read.
    * @return streaming DataFrame that represents data from the specified Dataset.
    */
   final private def readStreamInput(
       context: FlowAnalysisContext,
       name: String,
-      streamReader: DataStreamReader,
-      streamingReadOptions: StreamingReadOptions
+      streamReader: DataStreamReader
   ): DataFrame = {
     GraphIdentifierManager.parseAndQualifyInputIdentifier(context, name) match {
       case inputIdentifier: InternalDatasetIdentifier =>
         readGraphInput(
           context,
           inputIdentifier,
-          streamingReadOptions
+          isStreamingRead = true
         )
 
       case inputIdentifier: ExternalDatasetIdentifier =>
@@ -204,13 +200,13 @@ object FlowAnalysis {
    * Internal helper to reference dataset defined in the same [[DataflowGraph]].
    *
    * @param inputIdentifier The identifier of the Dataset to be read.
-   * @param readOptions Options for this read (may be either streaming or batch options)
+   * @param isStreamingRead Whether this is a streaming read or batch read.
    * @return streaming or batch DataFrame that represents data from the specified Dataset.
    */
   final private def readGraphInput(
       ctx: FlowAnalysisContext,
       inputIdentifier: InternalDatasetIdentifier,
-      readOptions: InputReadOptions
+      isStreamingRead: Boolean
   ): DataFrame = {
     val datasetIdentifier = inputIdentifier.identifier
 
@@ -227,7 +223,27 @@ object FlowAnalysis {
       ctx.availableInput(datasetIdentifier)
     }
 
-    val inputDF = input.load(readOptions)
+    val inputDF = input.load(asStreaming = isStreamingRead)
+
+    // Validate that the loaded DataFrame's streaming-ness matches the requested read mode. Tables
+    // pass through trivially as their [[VirtualTableInput.load]] honors `asStreaming` by
+    // construction. The check only ever fires for flows.
+    val incompatibleViewReadCheck =
+      ctx.spark.conf.get("pipelines.incompatibleViewCheck.enabled", "true").toBoolean
+
+    if (incompatibleViewReadCheck && isStreamingRead && !inputDF.isStreaming) {
+      throw new AnalysisException(
+        "INCOMPATIBLE_BATCH_VIEW_READ",
+        Map("datasetIdentifier" -> datasetIdentifier.toString)
+      )
+    }
+    if (incompatibleViewReadCheck && !isStreamingRead && inputDF.isStreaming) {
+      throw new AnalysisException(
+        "INCOMPATIBLE_STREAMING_VIEW_READ",
+        Map("datasetIdentifier" -> datasetIdentifier.toString)
+      )
+    }
+
     input match {
       // If the referenced input is a [[Flow]], because the query plans will be fused
       // together, we also need to fuse their confs.
@@ -235,9 +251,6 @@ object FlowAnalysis {
       case _ =>
     }
 
-    val incompatibleViewReadCheck =
-      ctx.spark.conf.get("pipelines.incompatibleViewCheck.enabled", "true").toBoolean
-
     // Wrap the DF in an alias so that columns in the DF can be referenced with
     // the following in the query:
     // - <catalog>.<schema>.<dataset>.<column>
@@ -248,30 +261,10 @@ object FlowAnalysis {
       qualifier = Seq(datasetIdentifier.catalog, datasetIdentifier.database).flatten
     )
 
-    readOptions match {
-      case sro: StreamingReadOptions =>
-        if (!inputDF.isStreaming && incompatibleViewReadCheck) {
-          throw new AnalysisException(
-            "INCOMPATIBLE_BATCH_VIEW_READ",
-            Map("datasetIdentifier" -> datasetIdentifier.toString)
-          )
-        }
-
-        if (sro.droppedUserOptions.nonEmpty) {
-          ctx.analysisWarnings += AnalysisWarning.StreamingReaderOptionsDropped(
-            sourceName = datasetIdentifier.unquotedString,
-            droppedOptions = sro.droppedUserOptions.keys.toSeq
-          )
-        }
-        ctx.streamingInputs += ResolvedInput(input, aliasIdentifier)
-      case _ =>
-        if (inputDF.isStreaming && incompatibleViewReadCheck) {
-          throw new AnalysisException(
-            "INCOMPATIBLE_STREAMING_VIEW_READ",
-            Map("datasetIdentifier" -> datasetIdentifier.toString)
-          )
-        }
-        ctx.batchInputs += ResolvedInput(input, aliasIdentifier)
+    if (isStreamingRead) {
+      ctx.streamingInputs += ResolvedInput(input, aliasIdentifier)
+    } else {
+      ctx.batchInputs += ResolvedInput(input, aliasIdentifier)
     }
     Dataset.ofRows(
       ctx.spark,
@@ -289,11 +282,11 @@ object FlowAnalysis {
   final private def readExternalBatchInput(
       context: FlowAnalysisContext,
       inputIdentifier: ExternalDatasetIdentifier,
-      name: String): DataFrame = {
+      name: String,
+      batchReader: DataFrameReader): DataFrame = {
 
-    val spark = context.spark
     context.externalInputs += inputIdentifier.identifier
-    spark.read.table(inputIdentifier.identifier.quotedString)
+    batchReader.table(inputIdentifier.identifier.quotedString)
   }
 
   /**
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala
index 1139946df59ac..e5f7cddc4d327 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala
@@ -18,11 +18,9 @@
 package org.apache.spark.sql.pipelines.graph
 
 import scala.collection.mutable
-import scala.collection.mutable.ListBuffer
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.classic.SparkSession
-import org.apache.spark.sql.pipelines.AnalysisWarning
 
 /**
  * A context used when evaluating a `Flow`'s query into a concrete DataFrame.
@@ -44,7 +42,6 @@ private[pipelines] case class FlowAnalysisContext(
     streamingInputs: mutable.HashSet[ResolvedInput] = mutable.HashSet.empty,
     requestedInputs: mutable.HashSet[TableIdentifier] = mutable.HashSet.empty,
     shouldLowerCaseNames: Boolean = false,
-    analysisWarnings: mutable.Buffer[AnalysisWarning] = new ListBuffer[AnalysisWarning],
     spark: SparkSession,
     externalInputs: mutable.HashSet[TableIdentifier] = mutable.HashSet.empty
 ) {
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowExecution.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowExecution.scala
index 13a5621947d57..c4cd358344ca1 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowExecution.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowExecution.scala
@@ -20,15 +20,31 @@ package org.apache.spark.sql.pipelines.graph
 import java.util.concurrent.ThreadPoolExecutor
 import java.util.concurrent.atomic.AtomicBoolean
 
+import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
+import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
+import org.json4s.JsonAST.{JArray, JString}
+import org.json4s.jackson.JsonMethods.{compact, parse}
+
+import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, LogKeys}
+import org.apache.spark.sql.{AnalysisException, Dataset, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.classic.SparkSession
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsRowLevelOperations, Table => CatalogTable, TableCatalog, TableInfo}
+import org.apache.spark.sql.pipelines.autocdc.{
+  AutoCdcReservedNames,
+  ChangeArgs,
+  Scd1BatchProcessor,
+  Scd1ForeachBatchHandler
+}
 import org.apache.spark.sql.pipelines.graph.QueryOrigin.ExceptionHelpers
-import org.apache.spark.sql.pipelines.util.SparkSessionUtils
+import org.apache.spark.sql.pipelines.util.{PipelinesCatalogUtils, SparkSessionUtils}
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, Trigger}
+import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -301,3 +317,382 @@ class SinkWrite(
       .start()
   }
 }
+
+object AutoCdcAuxiliaryTable {
+  /**
+   * Helper for deriving the auxiliary AutoCDC catalog table identifier from a target table. If a
+   * table exists with a name matching the name derived here, it is assumed to be an AutoCDC
+   * auxiliary table that should be managed by the pipeline.
+   */
+  def identifier(destination: TableIdentifier): TableIdentifier = TableIdentifier(
+    table = s"${AutoCdcReservedNames.prefix}aux_state_${destination.table}",
+    database = destination.database,
+    catalog = destination.catalog
+  )
+
+  /**
+   * Reserved table property key set on the auxiliary table to record which SCD strategy it
+   * serves.
+   */
+  val scdTypePropertyKey: String = s"${PipelinesTableProperties.pipelinesPrefix}autocdc.scdType"
+
+  /**
+   * Table property recording the auxiliary table's unquoted AutoCDC key column names as a JSON
+   * string array (e.g. `["id","region"]`). Written once when the auxiliary table is created and is
+   * considered immutable; full-refresh is the only way to change it.
+   */
+  val keyColumnNamesProperty: String =
+    s"${PipelinesTableProperties.pipelinesPrefix}autocdc.keyColumnNames"
+
+  /**
+   * Serialize key column names to the JSON form stored at [[keyColumnNamesProperty]].
+   * Round-trips an empty list as `[]`; callers are expected to enforce a non-empty key set
+   * upstream.
+   */
+  def serializeKeyColumnNames(names: Seq[String]): String = {
+    compact(JArray(names.map(JString(_)).toList))
+  }
+
+  /**
+   * Parse a [[keyColumnNamesProperty]] value. `None` if it is not a JSON array of strings.
+   * Round-trips an empty list as `[]`; callers are expected to enforce a non-empty key set
+   * upstream.
+   */
+  def parseKeyColumnNames(raw: String): Option[Seq[String]] = {
+    val parsed = try Some(parse(raw)) catch { case NonFatal(_) => None }
+    parsed.flatMap {
+      case JArray(elems) =>
+        val names = elems.collect { case JString(s) => s }
+        if (names.size == elems.size) Some(names) else None
+      case _ => None
+    }
+  }
+}
+
+/**
+ * Base trait for AutoCDC merge-based write flows.
+ *
+ * Today, this trait and its children manage auxiliary table creation and validation across
+ * pipeline executions. Eventually we should evolve DatasetManager to be aware of the concept of
+ * auxiliary tables, and streamline creation/validation there.
+ */
+trait AutoCdcMergeWriteBase {
+  /** The spark session the AutoCDC flow is going to be planned in. */
+  protected def spark: SparkSession
+
+  /** The destination (target) table entity the AutoCDC flow will be writing to. */
+  protected def destination: Table
+
+  /** The AutoCDC flow's identifier, used as `flowName` in error messages emitted by this mixin. */
+  protected def identifier: TableIdentifier
+
+  /** The AutoCDC flow's [[ChangeArgs]] (keys, sequencing, columnSelection, ...). */
+  protected def changeArgs: ChangeArgs
+
+  /** Full schema of the auxiliary table for this SCD type. */
+  protected def auxiliaryTableSchema: StructType
+
+  /**
+   * Create the auxiliary table for [[destination]] if it does not already exist and return its
+   * [[TableIdentifier]].
+   *
+   * When the aux table already exists, its schema and properties are left untouched. For SCD1
+   * the keys must be invariant across executions and the CDC metadata is always present, so
+   * this is correct; drift validation reads the recorded `keyColumnNamesProperty` to enforce
+   * the invariant before this method is called.
+   */
+  protected def createAuxiliaryTableIfNotExists(spark: SparkSession): TableIdentifier = {
+    val auxIdent = AutoCdcAuxiliaryTable.identifier(destination.identifier)
+    val (catalog, v2Identifier) = PipelinesCatalogUtils.resolveTableCatalog(spark, auxIdent)
+
+    if (!catalog.tableExists(v2Identifier)) {
+      val properties = mutable.Map.empty[String, String]
+
+      // Inherit the target's format so MERGE semantics line up. When unspecified, omit the
+      // provider so the catalog falls back to its default.
+      destination.format.foreach { fmt => properties(TableCatalog.PROP_PROVIDER) = fmt }
+
+      // Record which SCD strategy this auxiliary table serves so downstream readers can
+      // identify it without having to inspect the schema.
+      properties(AutoCdcAuxiliaryTable.scdTypePropertyKey) = changeArgs.storedAsScdType.label
+
+      // Persist the AutoCDC key column names as a JSON list on first creation. The value
+      // is stored verbatim by the catalog.
+      properties(AutoCdcAuxiliaryTable.keyColumnNamesProperty) =
+        AutoCdcAuxiliaryTable.serializeKeyColumnNames(auxiliaryKeyColumnNames)
+
+      // Table creation is not atomic with the table exists check, and [[createTable]] will fail
+      // with TableAlreadyExistsException if some asynchronous process creates the table between
+      // the [[tableExists]] check and [[createTable]]. This is both rare (we don't support
+      // multi-AutoCDC-flow targets so there are no race conditions within a single pipeline) and
+      // acceptable - users can cleanly retry the failed flow when this happens. SQL offers an
+      // atomic CREATE IF NOT EXISTS, but would require special casing of the table properties
+      // in DDL and we would lose compile-time syntax and type safety.
+      catalog.createTable(
+        v2Identifier,
+        new TableInfo.Builder()
+          .withColumns(CatalogV2Util.structTypeToV2Columns(auxiliaryTableSchema))
+          .withProperties(properties.asJava)
+          .build()
+      )
+    }
+    auxIdent
+  }
+
+  /**
+   * Resolves each AutoCDC key in `changeArgs.keys` to its [[StructField]] in
+   * [[auxiliaryTableSchema]], preserving `changeArgs.keys` declaration order. This is the
+   * expected (flow-declared) side of drift validation, distinct from the keys recorded on an
+   * existing auxiliary table.
+   *
+   * [[AutoCdcMergeFlow]] should have validated that all `changeArgs.keys` exist in the deduced
+   * aux/target schemas by now, so a missing key is an internal error rather than a user-facing
+   * condition.
+   */
+  private lazy val expectedAuxiliaryKeyFields: Seq[StructField] = {
+    val resolver = spark.sessionState.conf.resolver
+    changeArgs.keys.map { key =>
+      auxiliaryTableSchema.fields
+        .find(field => resolver(field.name, key.name))
+        .getOrElse(
+          throw SparkException.internalError(
+            s"AutoCDC key column '${key.name}' is missing from the auxiliary table schema " +
+            s"for flow ${identifier.unquotedString} writing to target " +
+            s"${destination.identifier.quotedString}."
+          )
+        )
+    }
+  }
+
+  /**
+   * Returns the resolved AutoCDC key column names as they appear in the auxiliary schema, in
+   * `changeArgs.keys` declaration order.
+   */
+  private lazy val auxiliaryKeyColumnNames: Seq[String] = expectedAuxiliaryKeyFields.map(_.name)
+
+  /**
+   * Validate that the target table's underlying connector implements
+   * [[SupportsRowLevelOperations]], which is the V2 connector contract for MERGE/UPDATE/DELETE
+   * with rewrite - all operations that the AutoCDC transformation executes.
+   */
+  protected def requireDestinationSupportsRowLevelOps(): Unit = {
+    val (catalog, v2Identifier) =
+      PipelinesCatalogUtils.resolveTableCatalog(spark, destination.identifier)
+    val destinationTable = catalog.loadTable(v2Identifier)
+
+    if (!destinationTable.isInstanceOf[SupportsRowLevelOperations]) {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_TARGET_DOES_NOT_SUPPORT_MERGE",
+        messageParameters = Map(
+          "tableName" -> destination.identifier.quotedString,
+          "format" -> destination.format.orElse(
+              Option(
+                destinationTable.properties.get(TableCatalog.PROP_PROVIDER)
+              )
+            )
+            .getOrElse("<unknown>")
+        )
+      )
+    }
+  }
+
+  /**
+   * If the auxiliary table for this flow's destination already exists, validate that the
+   * AutoCDC keys the flow expects line up with the keys recorded in the auxiliary
+   * table. On a fresh pipeline (or after a full refresh dropped the auxiliary), the
+   * auxiliary is absent and there's nothing to drift from, so this is a no-op.
+   */
+  protected def validateNoAutoCdcKeyDriftIfAuxTableExists(): Unit = {
+    val auxIdent = AutoCdcAuxiliaryTable.identifier(destination.identifier)
+    val (catalog, v2Identifier) = PipelinesCatalogUtils.resolveTableCatalog(spark, auxIdent)
+    if (catalog.tableExists(v2Identifier)) {
+      validateNoAutoCdcKeyDrift(catalog.loadTable(v2Identifier), auxIdent)
+    }
+  }
+
+  /**
+   * Validate that the AutoCDC key columns the flow expects match the keys recorded in the
+   * existing auxiliary table at [[auxIdent]] as a set: same arity, same set of names (per the
+   * session resolver), same per-name `dataType`s.
+   */
+  private def validateNoAutoCdcKeyDrift(
+      existingAuxTable: CatalogTable,
+      auxIdent: TableIdentifier): Unit = {
+    val resolver = spark.sessionState.conf.resolver
+    val existingAuxSchema = CatalogV2Util.v2ColumnsToStructType(existingAuxTable.columns())
+
+    // Resolve the flow-declared (expected) keys from [[auxiliaryTableSchema]]. We deliberately
+    // do not look them up in [[existingAuxSchema]] - that's the recorded side, and conflating
+    // the two sides would mask drift. See [[expectedAuxiliaryKeyFields]].
+    val expectedKeyFields: Seq[StructField] = expectedAuxiliaryKeyFields
+    val recordedKeyNames = parseRecordedKeyColumnNames(existingAuxTable, auxIdent)
+    val recordedKeyFields: Seq[StructField] = recordedKeyNames.map { name =>
+      existingAuxSchema.fields
+        .find(field => resolver(field.name, name))
+        .getOrElse(
+          // Either an implementation bug or, more likely, the user has corrupted the auxiliary
+          // table schema (e.g. dropped the key column). The remedy is full-refresh in either
+          // case.
+          throw new AnalysisException(
+            errorClass = "AUTOCDC_INVALID_STATE.AUXILIARY_TABLE_KEY_COLUMN_MISSING",
+            messageParameters = Map(
+              "flowName" -> identifier.unquotedString,
+              "auxTableName" -> auxIdent.unquotedString,
+              "keyColumnName" -> name,
+              "propertyName" -> AutoCdcAuxiliaryTable.keyColumnNamesProperty
+            )
+          )
+        )
+    }
+
+    val drifted =
+      // Arity drift (added or dropped keys).
+      recordedKeyFields.length != expectedKeyFields.length ||
+      // Name or dataType drift: every expected key must have a same-name (resolver-aware)
+      // recorded counterpart with an equivalent dataType. Columns changing nullability and
+      // metadata in the schema are intentionally tolerated, although null key values during
+      // microbatch execution will be invalidated regardless.
+      expectedKeyFields.exists { expected =>
+        recordedKeyFields.find(rf => resolver(rf.name, expected.name)) match {
+          case None => true
+          case Some(recorded) => !recorded.dataType.sameType(expected.dataType)
+        }
+      }
+
+    if (drifted) {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_INVALID_STATE.KEY_SCHEMA_DRIFT",
+        messageParameters = Map(
+          "flowName" -> identifier.unquotedString,
+          "auxTableName" -> auxIdent.unquotedString,
+          "expectedKeySchema" -> StructType(expectedKeyFields).toDDL,
+          "recordedKeySchema" -> StructType(recordedKeyFields).toDDL
+        )
+      )
+    }
+  }
+
+  /**
+   * Read the [[AutoCdcAuxiliaryTable.keyColumnNamesProperty]] off an existing auxiliary table
+   * and parse it into the ordered list of recorded AutoCDC key column names.
+   */
+  private def parseRecordedKeyColumnNames(
+      existingAuxTable: CatalogTable,
+      auxIdent: TableIdentifier): Seq[String] = {
+    val rawKeyColumnNamesStr = Option(
+      existingAuxTable.properties().get(AutoCdcAuxiliaryTable.keyColumnNamesProperty)
+    ).getOrElse {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_INVALID_STATE.AUXILIARY_TABLE_PROPERTY_MISSING",
+        messageParameters = Map(
+          "flowName" -> identifier.unquotedString,
+          "auxTableName" -> auxIdent.unquotedString,
+          "propertyName" -> AutoCdcAuxiliaryTable.keyColumnNamesProperty
+        )
+      )
+    }
+    AutoCdcAuxiliaryTable.parseKeyColumnNames(rawKeyColumnNamesStr).getOrElse {
+      throw new AnalysisException(
+        errorClass = "AUTOCDC_INVALID_STATE.AUXILIARY_TABLE_PROPERTY_MALFORMED",
+        messageParameters = Map(
+          "flowName" -> identifier.unquotedString,
+          "auxTableName" -> auxIdent.unquotedString,
+          "propertyName" -> AutoCdcAuxiliaryTable.keyColumnNamesProperty,
+          "rawValue" -> rawKeyColumnNamesStr
+        )
+      )
+    }
+  }
+}
+
+/**
+ * A [[StreamingFlowExecution]] that applies a CDC event stream to a target [[Table]] via
+ * SCD Type 1 MERGE semantics.
+ */
+class Scd1MergeStreamingWrite(
+    val identifier: TableIdentifier,
+    val flow: AutoCdcMergeFlow,
+    val graph: DataflowGraph,
+    val updateContext: PipelineUpdateContext,
+    val checkpointPath: String,
+    val trigger: Trigger,
+    val destination: Table,
+    val sqlConf: Map[String, String]
+) extends StreamingFlowExecution with AutoCdcMergeWriteBase {
+
+  requireDestinationSupportsRowLevelOps()
+  validateNoAutoCdcKeyDriftIfAuxTableExists()
+
+  override def getOrigin: QueryOrigin = flow.origin
+
+  override protected def changeArgs: ChangeArgs = flow.changeArgs
+
+  override def startStream(): StreamingQuery = {
+    val sourceChangeDataFeed = graph.reanalyzeFlow(flow).df
+
+    // The auxiliary table is created here (at flow execution) rather than during flow resolution
+    // or dataset materialization for two reasons:
+    //   1. It is an internal state store: we deliberately keep it out of the graph registration
+    //      context's table set so that it is invisible to other flows and the [[DatasetManager]]
+    //      will never materialize it.
+    //   2. Its format must match the target table's, which only exists after the target is
+    //      materialized. Flow resolution must also stay side-effect free (e.g. for dry runs).
+    val auxiliaryTableIdentifier = createAuxiliaryTableIfNotExists(spark = updateContext.spark)
+
+    val foreachBatchHandler = Scd1ForeachBatchHandler(
+      batchProcessor = Scd1BatchProcessor(
+        changeArgs = flow.changeArgs,
+        resolvedSequencingType = flow.sequencingType
+      ),
+      auxiliaryTableIdentifier = auxiliaryTableIdentifier,
+      targetTableIdentifier = destination.identifier
+    )
+
+    sourceChangeDataFeed.writeStream
+      .queryName(displayName)
+      .option("checkpointLocation", checkpointPath)
+      .trigger(trigger)
+      .foreachBatch((batch: Dataset[Row], batchId: Long) => {
+        foreachBatchHandler.execute(batch, batchId)
+      })
+      .start()
+  }
+
+  override protected lazy val auxiliaryTableSchema: StructType =
+    // SCD1's auxiliary table is just keys + the CDC metadata struct; no user data columns. Keys
+    // come first, in `changeArgs.keys` declaration order, to anchor the per-key sequence
+    // watermark used to gate out-of-order events.
+    StructType(autoCdcKeyFields :+ cdcMetadataField)
+
+  /**
+   * AutoCDC key columns resolved out of the flow's augmented schema, in
+   * `changeArgs.keys` declaration order. Keys are guaranteed to be present in the schema
+   * because [[AutoCdcMergeFlow.schema]] validates that.
+   */
+  private lazy val autoCdcKeyFields: Seq[StructField] = {
+    val resolver = updateContext.spark.sessionState.conf.resolver
+    val targetTableSchema = flow.schema
+    flow.changeArgs.keys.map { key =>
+      targetTableSchema.fields
+        .find(field => resolver(field.name, key.name))
+        .getOrElse(
+          throw SparkException.internalError(
+            s"Key column '${key.name}' was not found in the AutoCDC flow's selected schema."
+          )
+        )
+    }
+  }
+
+  /** CDC metadata field resolved out of the flow's augmented schema. */
+  private lazy val cdcMetadataField: StructField = {
+    val resolver = updateContext.spark.sessionState.conf.resolver
+    flow.schema.fields
+      .find(field => resolver(field.name, Scd1BatchProcessor.cdcMetadataColName))
+      .getOrElse(
+        throw SparkException.internalError(
+          s"CDC metadata column '${Scd1BatchProcessor.cdcMetadataColName}' was not found in the " +
+          s"AutoCDC flow's target table schema."
+        )
+      )
+  }
+}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowPlanner.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowPlanner.scala
index 29e2da4a5e13f..8251780524a2d 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowPlanner.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowPlanner.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.pipelines.graph
 
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.pipelines.autocdc.ScdType
 import org.apache.spark.sql.streaming.Trigger
 
 /**
@@ -73,10 +75,30 @@ class FlowPlanner(
               trigger = triggerFor(sf),
               checkpointPath = flowMetadata.latestCheckpointLocation
             )
-          case _ =>
-            throw new UnsupportedOperationException(
-              s"Unsupported destination type: ${output.getClass.getSimpleName} for " +
-              s"streaming flow ${sf.identifier} (${flow.destinationIdentifier})"
+          case _ => unsupportedDestinationType(sf, output)
+        }
+      case acmf: AutoCdcMergeFlow =>
+        acmf.changeArgs.storedAsScdType match {
+          case ScdType.Type1 =>
+            val flowMetadata = FlowSystemMetadata(updateContext, acmf, graph)
+            output match {
+              case o: Table =>
+                new Scd1MergeStreamingWrite(
+                  identifier = acmf.identifier,
+                  flow = acmf,
+                  graph = graph,
+                  updateContext = updateContext,
+                  checkpointPath = flowMetadata.latestCheckpointLocation,
+                  trigger = triggerFor(acmf),
+                  destination = o,
+                  sqlConf = acmf.sqlConf
+                )
+              case _ => unsupportedDestinationType(acmf, output)
+            }
+          case ScdType.Type2 =>
+            throw new AnalysisException(
+              errorClass = "AUTOCDC_SCD2_NOT_SUPPORTED",
+              messageParameters = Map.empty
             )
         }
       case _ =>
@@ -85,4 +107,11 @@ class FlowPlanner(
         )
     }
   }
+
+  private def unsupportedDestinationType(flow: ResolvedFlow, output: Output): Nothing = {
+    throw new UnsupportedOperationException(
+      s"Unsupported destination type: ${output.getClass.getSimpleName} for " +
+      s"flow ${flow.identifier} writing to ${flow.destinationIdentifier}"
+    )
+  }
 }
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphRegistrationContext.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphRegistrationContext.scala
index dadda0561b19f..970fdb4b70e94 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphRegistrationContext.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphRegistrationContext.scala
@@ -59,7 +59,7 @@ class GraphRegistrationContext(
   }
 
   def registerFlow(flowDef: UnresolvedFlow): Unit = {
-    flows += flowDef.copy(sqlConf = defaultSqlConf ++ flowDef.sqlConf)
+    flows += flowDef.withSqlConf(defaultSqlConf ++ flowDef.sqlConf)
   }
 
   private def isEmpty: Boolean = {
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphValidations.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphValidations.scala
index ade6ce0bad3c3..d56b95b5830b0 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphValidations.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphValidations.scala
@@ -34,6 +34,25 @@ trait GraphValidations extends Logging {
    */
   protected[pipelines] def validateMultiQueryTables(): Map[TableIdentifier, Seq[Flow]] = {
     val multiQueryTables = flowsTo.filter(_._2.size > 1)
+
+    // A multiflow table may not have an AutoCDC flow; AutoCDC targets must have exactly one
+    // input flow.
+    multiQueryTables
+      .find { case (_, flows) => flows.exists(isAutoCdcFlow) }
+      .foreach {
+        case (dest, flows) =>
+          throw new AnalysisException(
+            "AUTOCDC_MULTIPLE_FLOWS_TO_TARGET",
+            Map(
+              "tableName" -> dest.unquotedString,
+              "flows" -> flows
+                .map(_.displayName)
+                .sorted
+                .mkString(", ")
+            )
+          )
+      }
+
     // Non-streaming tables do not support multiflow.
     multiQueryTables
       .find {
@@ -58,6 +77,12 @@ trait GraphValidations extends Logging {
     multiQueryTables
   }
 
+  /** Returns true iff the given flow is an [[AutoCdcFlow]] (resolved or not). */
+  private def isAutoCdcFlow(f: Flow): Boolean = f match {
+    case _: AutoCdcFlow | _: AutoCdcMergeFlow => true
+    case _ => false
+  }
+
   /**
    * Validate that each resolved flow is correctly either a streaming flow or non-streaming flow,
    * depending on the flow type (ex. once flow vs non-once flow) and the dataset type the flow
@@ -126,8 +151,21 @@ trait GraphValidations extends Logging {
                 )
               }
             case _: TemporaryView =>
-              // Temporary views' flows are allowed to be either streaming or batch, so no
-              // validation needs to be done for them
+              // Temporary views' flows are generally allowed to be either streaming or batch.
+              resolvedFlow match {
+                case _: AutoCdcMergeFlow =>
+                  // The exception is AutoCDC flows, which require a streaming-table sink to
+                  // immediately execute MERGE against.
+                  throw new AnalysisException(
+                    errorClass =
+                      "INVALID_FLOW_QUERY_TYPE.AUTOCDC_RELATION_FOR_TEMPORARY_VIEW",
+                    messageParameters = Map(
+                      "flowIdentifier" -> resolvedFlow.identifier.quotedString,
+                      "viewIdentifier" -> destTableIdentifier.quotedString
+                    )
+                  )
+                case _ => // OK: any other flow is permitted to target a temporary view.
+              }
           }
         }
       }
@@ -215,7 +253,7 @@ trait GraphValidations extends Logging {
   }
 
   protected def validateUserSpecifiedSchemas(): Unit = {
-    flows.flatMap(f => table.get(f.identifier)).foreach { t: TableInput =>
+    flows.flatMap(f => table.get(f.identifier)).foreach { t: TableElement =>
       // The output inferred schema of a table is the declared schema merged with the
       // schema of all incoming flows. This must be equivalent to the declared schema.
       val inferredSchema = SchemaInferenceUtils
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/PipelinesErrors.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/PipelinesErrors.scala
index 7116f5fbcf068..b194e9c235fba 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/PipelinesErrors.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/PipelinesErrors.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.pipelines.graph
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -35,19 +34,6 @@ case class UnresolvedDatasetException(identifier: TableIdentifier)
       s"pipeline but could not be resolved."
     )
 
-/**
- * Exception raised when a flow fails to read from a table defined within the pipeline
- *
- * @param name The name of the table
- * @param cause The cause of the failure
- */
-case class LoadTableException(name: String, cause: Option[Throwable])
-    extends SparkException(
-      errorClass = "INTERNAL_ERROR",
-      messageParameters = Map("message" -> s"Failed to load table '$name'"),
-      cause = cause.orNull
-    )
-
 object PipelinesErrors extends Logging {
 
   /**
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/SqlGraphRegistrationContext.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/SqlGraphRegistrationContext.scala
index 829179142dc5c..4dfd096935781 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/SqlGraphRegistrationContext.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/SqlGraphRegistrationContext.scala
@@ -237,7 +237,7 @@ class SqlGraphRegistrationContext(
 
       // Register flow that backs this streaming table.
       graphRegistrationContext.registerFlow(
-        UnresolvedFlow(
+        UntypedFlow(
           identifier = stIdentifier,
           destinationIdentifier = stIdentifier,
           func = FlowAnalysis.createFlowFunctionFromLogicalPlan(cst.query),
@@ -288,7 +288,7 @@ class SqlGraphRegistrationContext(
 
       // Register flow that backs this materialized view.
       graphRegistrationContext.registerFlow(
-        UnresolvedFlow(
+        UntypedFlow(
           identifier = mvIdentifier,
           destinationIdentifier = mvIdentifier,
           func = FlowAnalysis.createFlowFunctionFromLogicalPlan(cmv.query),
@@ -331,7 +331,7 @@ class SqlGraphRegistrationContext(
 
       // Register flow that backs this persisted view.
       graphRegistrationContext.registerFlow(
-        UnresolvedFlow(
+        UntypedFlow(
           identifier = viewIdentifier,
           destinationIdentifier = viewIdentifier,
           func = FlowAnalysis.createFlowFunctionFromLogicalPlan(cv.query),
@@ -375,7 +375,7 @@ class SqlGraphRegistrationContext(
 
       // Register flow definition that backs this temporary view.
       graphRegistrationContext.registerFlow(
-        UnresolvedFlow(
+        UntypedFlow(
           identifier = viewIdentifier,
           destinationIdentifier = viewIdentifier,
           func = FlowAnalysis.createFlowFunctionFromLogicalPlan(cvc.plan),
@@ -451,7 +451,7 @@ class SqlGraphRegistrationContext(
         .identifier
 
       graphRegistrationContext.registerFlow(
-        UnresolvedFlow(
+        UntypedFlow(
           identifier = flowIdentifier,
           destinationIdentifier = qualifiedDestinationIdentifier,
           func = FlowAnalysis.createFlowFunctionFromLogicalPlan(flowQueryLogicalPlan),
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/State.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/State.scala
index efe5849d1cbd8..90c9030cf75e3 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/State.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/State.scala
@@ -25,11 +25,13 @@ import org.apache.spark.sql.AnalysisException
 object State extends Logging {
 
   /**
-   * Find the graph elements to reset given the current update context.
+   * Find the flows to reset given the current update context.
    * @param graph The graph to reset.
    * @param env The current update context.
    */
-  private def findElementsToReset(graph: DataflowGraph, env: PipelineUpdateContext): Seq[Input] = {
+  private def findFlowsToReset(
+      graph: DataflowGraph,
+      env: PipelineUpdateContext): Seq[ResolvedFlow] = {
     // If tableFilter is an instance of SomeTables, this is a refresh selection and all tables
     // to reset should be resettable; Otherwise, this is a full graph update, and we reset all
     // tables that are resettable.
@@ -62,25 +64,17 @@ object State extends Logging {
       }
     }
 
-    specifiedTablesToReset.flatMap(t => t +: graph.resolvedFlowsTo(t.identifier)) ++
+    specifiedTablesToReset.flatMap(t => graph.resolvedFlowsTo(t.identifier)) ++
     specifiedSinksToReset.flatMap(s => graph.resolvedFlowsTo(s.identifier))
   }
 
   /**
-   * Performs the following on targets selected for full refresh:
-   * - Clearing checkpoint data
-   * - Truncating table data
+   * Rolls the streaming checkpoint directory of every flow selected for full refresh. Table
+   * truncation is handled in [[DatasetManager.materializeTables]] since the Hive metastore does
+   * not support removing all columns from a table.
    */
-  def reset(resolvedGraph: DataflowGraph, env: PipelineUpdateContext): Seq[Input] = {
-    val elementsToReset: Seq[Input] = findElementsToReset(resolvedGraph, env)
-
-    elementsToReset.foreach {
-      case f: ResolvedFlow => reset(f, env, resolvedGraph)
-      case _ => // tables is handled in materializeTables since hive metastore does not support
-                // removing all columns from a table.
-    }
-
-    elementsToReset
+  def reset(resolvedGraph: DataflowGraph, env: PipelineUpdateContext): Unit = {
+    findFlowsToReset(resolvedGraph, env).foreach(reset(_, env, resolvedGraph))
   }
 
   /**
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/elements.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/elements.scala
index ce3a63de6a333..885755fd78ece 100644
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/elements.scala
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/elements.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.pipelines.graph
 
 import java.util
 
-import scala.util.control.NonFatal
-
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Row
@@ -29,12 +27,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.classic.{DataFrame, SparkSession}
 import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
 import org.apache.spark.sql.pipelines.common.DatasetType
-import org.apache.spark.sql.pipelines.util.{
-  BatchReadOptions,
-  InputReadOptions,
-  SchemaInferenceUtils,
-  StreamingReadOptions
-}
+import org.apache.spark.sql.pipelines.util.SchemaInferenceUtils
 import org.apache.spark.sql.types.StructType
 
 /** An element in a [[DataflowGraph]]. */
@@ -68,10 +61,10 @@ trait Input extends GraphElement {
 
   /**
    * Returns a DataFrame that is a result of loading data from this [[Input]].
-   * @param readOptions Type of input. Used to determine streaming/batch
+   * @param asStreaming whether to try loading this input as a streaming or batch input.
    * @return Streaming or batch DataFrame of this Input's data.
    */
-  def load(readOptions: InputReadOptions): DataFrame
+  def load(asStreaming: Boolean): DataFrame
 }
 
 /**
@@ -101,8 +94,8 @@ sealed trait Dataset extends Output {
   def path: String
 }
 
-/** A type of [[Input]] where data is loaded from a table. */
-sealed trait TableInput extends Input {
+/** A graph element backed by a table: either a concrete [[Table]] or a [[VirtualTableInput]]. */
+sealed trait TableElement extends GraphElement {
 
   /** The user-specified schema for this table. */
   def specifiedSchema: Option[StructType]
@@ -132,29 +125,9 @@ case class Table(
     override val origin: QueryOrigin,
     isStreamingTable: Boolean,
     format: Option[String]
-) extends TableInput
+) extends TableElement
     with Dataset {
 
-  // Load this table's data from underlying storage.
-  override def load(readOptions: InputReadOptions): DataFrame = {
-    try {
-      lazy val tableName = identifier.quotedString
-
-      val df = readOptions match {
-        case sro: StreamingReadOptions =>
-          spark.readStream.options(sro.userOptions).table(tableName)
-        case _: BatchReadOptions =>
-          spark.read.table(tableName)
-        case _ =>
-          throw new IllegalArgumentException("Unhandled `InputReadOptions` type when loading table")
-      }
-
-      df
-    } catch {
-      case NonFatal(e) => throw LoadTableException(displayName, Option(e))
-    }
-  }
-
   /** Returns the normalized storage location to this [[Table]]. */
   override def path: String = {
     if (!normalized) {
@@ -176,42 +149,55 @@ case class Table(
 }
 
 /**
- * A type of [[TableInput]] that returns data from a specified schema or from the inferred
- * [[Flow]]s that write to the table.
+ * A virtual table is a representation of a pipeline table used during analysis. During analysis we
+ * only care about the schemas of declared tables, and its possible the declared tables do not yet
+ * exist in the catalog. Hence we represent all tables in the graph with their "virtual"
+ * counterparts, which are simply empty dataframes but with the same schemas.
+ *
+ * We refer to the declared table that the virtual counterpart represents as the "parent" table
+ * below.
+ *
+ * @param identifier  The identifier of the parent table.
+ * @param specifiedSchema The user-specified schema for the parent table.
+ * @param incomingFlowIdentifiers The identifiers of all flows that write to the parent table.
+ * @param availableFlows  All resolved flows that write to the parent table.
  */
 case class VirtualTableInput(
     identifier: TableIdentifier,
     specifiedSchema: Option[StructType],
     incomingFlowIdentifiers: Set[TableIdentifier],
     availableFlows: Seq[ResolvedFlow] = Nil
-) extends TableInput
+) extends TableElement with Input
     with Logging {
   override def origin: QueryOrigin = QueryOrigin()
 
   assert(availableFlows.forall(_.destinationIdentifier == identifier))
-  override def load(readOptions: InputReadOptions): DataFrame = {
-    // Infer the schema for this virtual table
-    def getFinalSchema: StructType = {
-      specifiedSchema match {
-        // This is not a backing table, and we have a user-specified schema, so use it directly.
-        case Some(ss) => ss
-        // Otherwise infer the schema from a combination of the incoming flows and the
-        // user-specified schema, if provided.
-        case _ =>
-          SchemaInferenceUtils.inferSchemaFromFlows(availableFlows, specifiedSchema)
-      }
-    }
 
-    // create empty streaming/batch df based on input type.
-    def createEmptyDF(schema: StructType): DataFrame = readOptions match {
-      case _: StreamingReadOptions =>
-        MemoryStream[Row](ExpressionEncoder(schema, lenient = false), spark)
-          .toDF()
-      case _ => spark.createDataFrame(new util.ArrayList[Row](), schema)
+  /**
+   * Loads this virtual table as a dataframe
+   *
+   * @param asStreaming whether to load as a streaming DF or batch DF. There are cases where we may
+   *                    want to batch read from a streaming table, for example.
+   */
+  def load(asStreaming: Boolean): DataFrame = {
+    val deducedSchema = specifiedSchema match {
+      // If the user specified a schema, use it directly.
+      case Some(ss) => ss
+      // Otherwise infer the schema from a combination of the incoming flows and the
+      // user-specified schema, if provided.
+      case _ =>
+        SchemaInferenceUtils.inferSchemaFromFlows(availableFlows, specifiedSchema)
     }
 
-    val df = createEmptyDF(getFinalSchema)
-    df
+    // Produce either a streaming or batch dataframe, depending on whether this is a virtual
+    // representation of a streaming or non-streaming table. Return the [empty] dataframe with the
+    // deduced schema.
+    if (asStreaming) {
+      MemoryStream[Row](ExpressionEncoder(deducedSchema, lenient = false), spark)
+        .toDF()
+    } else {
+      spark.createDataFrame(new util.ArrayList[Row](), deducedSchema)
+    }
   }
 }
 
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/util/InputReadInfo.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/util/InputReadInfo.scala
deleted file mode 100644
index 070927aea295f..0000000000000
--- a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/util/InputReadInfo.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.pipelines.util
-
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.pipelines.util.StreamingReadOptions.EmptyUserOptions
-
-/**
- * Generic options for a read of an input.
- */
-sealed trait InputReadOptions
-
-/**
- * Options for a batch read of an input.
- */
-final case class BatchReadOptions() extends InputReadOptions
-
-/**
- * Options for a streaming read of an input.
- *
- * @param userOptions Holds the user defined read options.
- * @param droppedUserOptions Holds the options that were specified by the user but
- *                       not actually used. This is a bug but we are preserving this behavior
- *                       for now to avoid making a backwards incompatible change.
- */
-final case class StreamingReadOptions(
-    userOptions: CaseInsensitiveMap[String] = EmptyUserOptions,
-    droppedUserOptions: CaseInsensitiveMap[String] = EmptyUserOptions
-) extends InputReadOptions
-
-object StreamingReadOptions {
-  val EmptyUserOptions: CaseInsensitiveMap[String] = CaseInsensitiveMap(Map())
-}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/util/PipelinesCatalogUtils.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/util/PipelinesCatalogUtils.scala
new file mode 100644
index 0000000000000..8df9f128a25db
--- /dev/null
+++ b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/util/PipelinesCatalogUtils.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.util
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/** Catalog-resolution helpers shared across the pipelines module. */
+object PipelinesCatalogUtils {
+
+  /**
+   * Resolve a v1 [[TableIdentifier]] to a `(TableCatalog, Identifier)` pair usable against the
+   * v2 connector APIs. If `ident.catalog` is unset, falls back to the session's
+   * `currentCatalog`.
+   */
+  def resolveTableCatalog(
+      spark: SparkSession,
+      ident: TableIdentifier): (TableCatalog, Identifier) = {
+    val catalogManager = spark.sessionState.catalogManager
+    val catalogPlugin = ident.catalog
+      .map(catalogManager.catalog)
+      .getOrElse(catalogManager.currentCatalog)
+    val catalog = catalogPlugin match {
+      case t: TableCatalog => t
+      case _ => throw QueryCompilationErrors.missingCatalogTablesAbilityError(catalogPlugin)
+    }
+    val namespace = ident.database.getOrElse(
+      throw SparkException.internalError(
+        s"Cannot resolve table identifier ${ident.quotedString}: namespace is unspecified."
+      )
+    )
+    (catalog, Identifier.of(Array(namespace), ident.table))
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/analysis/ReadOptionsPropagationOnAnalysisSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/analysis/ReadOptionsPropagationOnAnalysisSuite.scala
new file mode 100644
index 0000000000000..763a6f500fdd8
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/analysis/ReadOptionsPropagationOnAnalysisSuite.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.analysis
+
+import scala.collection.concurrent.TrieMap
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
+import org.apache.spark.sql.classic.SparkSession
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.streaming.runtime.StreamingRelation
+import org.apache.spark.sql.pipelines.graph.{FlowFunction, FlowFunctionResult, Input, QueryContext, QueryOrigin}
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * Tracker for flow function results.
+ *
+ * SDP analyzes flows in parallel (see [[DataflowGraphTransformer.transformDownNodes]]), so the
+ * backing map must tolerate concurrent writes from worker threads. A [[TrieMap]] gives lock-free
+ * per-key atomicity while remaining a [[scala.collection.mutable.Map]] for read sites.
+ *
+ * @param flowFunctionResults Concurrent map storing the latest FlowFunctionResult per flow
+ *                            function.
+ */
+case class FlowFunctionResultTracker(
+    flowFunctionResults: scala.collection.concurrent.Map[String, FlowFunctionResult] =
+      TrieMap.empty
+)
+
+/**
+ * Instrumented FlowFunction implementation, used to track flow function results.
+ * @param flowName  The name of the flow function being tracked
+ * @param flowFunction  The flow function being tracked
+ * @param flowFunctionResultTracker The flow function results tracker instance
+ */
+class InstrumentedFlowFunction(
+    flowName: String,
+    flowFunction: FlowFunction,
+    flowFunctionResultTracker: FlowFunctionResultTracker
+)
+  extends FlowFunction {
+  override def call(
+     allInputs: Set[TableIdentifier],
+     availableInputs: Seq[Input],
+     configuration: Map[String, String],
+     queryContext: QueryContext,
+     queryOrigin: QueryOrigin
+  ): FlowFunctionResult = {
+    val flowFunctionResult = flowFunction.call(
+      allInputs,
+      availableInputs,
+      configuration,
+      queryContext,
+      queryOrigin
+    )
+    flowFunctionResultTracker.flowFunctionResults.put(flowName, flowFunctionResult)
+    flowFunctionResult
+  }
+}
+
+class InstrumentedTestGraphRegistrationContext(
+    spark: SparkSession,
+    flowFunctionResultTracker: FlowFunctionResultTracker
+)
+  extends TestGraphRegistrationContext(spark) {
+
+  def readFlowFunc(
+      flowNameForTracking: String,
+      tableName: String,
+      extraOptions: CaseInsensitiveStringMap
+  ): FlowFunction =
+    new InstrumentedFlowFunction(
+      flowName = flowNameForTracking,
+      flowFunction = readFlowFunc(tableName, extraOptions),
+      flowFunctionResultTracker = flowFunctionResultTracker
+    )
+
+  def readStreamFlowFunc(
+      flowNameForTracking: String,
+      tableName: String,
+      extraOptions: CaseInsensitiveStringMap
+  ): FlowFunction =
+    new InstrumentedFlowFunction(
+      flowName = flowNameForTracking,
+      flowFunction = readStreamFlowFunc(tableName, extraOptions),
+      flowFunctionResultTracker = flowFunctionResultTracker
+    )
+}
+
+/**
+ * Test suite for verifying propagation of read options during pipelines analysis.
+ */
+class ReadOptionsPropagationOnAnalysisSuite extends ExecutionTest with SharedSparkSession {
+  test("internal pipeline batch read options are propagated during flow function analysis") {
+    val session = spark
+    import session.implicits._
+
+    val flowFunctionResultTracker = FlowFunctionResultTracker()
+
+    withTable("spark_catalog.test_db.a", "spark_catalog.test_db.b") {
+      val graphRegistrationContext =
+        new InstrumentedTestGraphRegistrationContext(spark, flowFunctionResultTracker) {
+          registerMaterializedView(name = "a", query = dfFlowFunc(Seq(1, 2).toDF("id")))
+          registerMaterializedView(
+            name = "b",
+            query = readFlowFunc(
+              flowNameForTracking = "bFlow",
+              tableName = "a",
+              extraOptions = new CaseInsensitiveStringMap(Map("x" -> "y").asJava)
+            )
+          )
+        }
+      val unresolvedGraph = graphRegistrationContext.toDataflowGraph
+
+      val updateContext = TestPipelineUpdateContext(spark, unresolvedGraph, storageRoot)
+      updateContext.pipelineExecution.runPipeline()
+      updateContext.pipelineExecution.awaitCompletion()
+
+      val bFlow = flowFunctionResultTracker.flowFunctionResults.get("bFlow").get
+
+      // Verify the flow function's analyzed DF logical plan contains specified options.
+      assert(bFlow.dataFrame.get.logicalPlan
+        .asInstanceOf[SubqueryAlias].child
+        .asInstanceOf[LogicalRelation].relation
+        .asInstanceOf[HadoopFsRelation].options.get("x").contains("y"))
+    }
+  }
+
+  test("internal pipeline stream read options are propagated during flow function analysis") {
+    val flowFunctionResultTracker = FlowFunctionResultTracker()
+
+    withTable("spark_catalog.default.a", "spark_catalog.test_db.b", "spark_catalog.test_db.c") {
+      // Create a regular external table that ST "b" can stream from, then have ST "c" stream from
+      // "b".
+      spark.range(10).write.saveAsTable("spark_catalog.default.a")
+
+      val graphRegistrationContext =
+        new InstrumentedTestGraphRegistrationContext(spark, flowFunctionResultTracker) {
+          registerTable(
+            name = "b",
+            query = Option(
+                readStreamFlowFunc(
+                  name = "spark_catalog.default.a"
+                )
+              )
+            )
+          registerTable(
+            name = "c",
+            query = Option(
+              readStreamFlowFunc(
+                flowNameForTracking = "cFlow",
+                tableName = "b",
+                extraOptions = new CaseInsensitiveStringMap(Map("x" -> "y").asJava)
+              )
+            )
+          )
+        }
+      val unresolvedGraph = graphRegistrationContext.toDataflowGraph
+
+      val updateContext = TestPipelineUpdateContext(spark, unresolvedGraph, storageRoot)
+      updateContext.pipelineExecution.runPipeline()
+      updateContext.pipelineExecution.awaitCompletion()
+
+      val cFlow = flowFunctionResultTracker.flowFunctionResults.get("cFlow").get
+
+      // Verify the flow function's analyzed DF logical plan contains specified options.
+      assert(cFlow.dataFrame.get.logicalPlan
+        .asInstanceOf[SubqueryAlias].child
+        .asInstanceOf[StreamingRelation].dataSource.options.get("x").contains("y"))
+    }
+  }
+
+  test("external pipeline batch read options are propagated during flow function analysis") {
+    val flowFunctionResultTracker = FlowFunctionResultTracker()
+
+    withTable("spark_catalog.default.a", "spark_catalog.test_db.b") {
+      // Create regular external table to batch read from with options.
+      spark.range(10).write.saveAsTable("spark_catalog.default.a")
+
+      val graphRegistrationContext =
+        new InstrumentedTestGraphRegistrationContext(spark, flowFunctionResultTracker) {
+          registerMaterializedView(
+            name = "b",
+            query = readFlowFunc(
+              flowNameForTracking = "bFlow",
+              tableName = "spark_catalog.default.a",
+              extraOptions = new CaseInsensitiveStringMap(Map("x" -> "y").asJava)
+            )
+          )
+        }
+      val unresolvedGraph = graphRegistrationContext.toDataflowGraph
+
+      val updateContext = TestPipelineUpdateContext(spark, unresolvedGraph, storageRoot)
+      updateContext.pipelineExecution.runPipeline()
+      updateContext.pipelineExecution.awaitCompletion()
+
+      val bFlow = flowFunctionResultTracker.flowFunctionResults.get("bFlow").get
+
+      // Verify the flow function's analyzed DF logical plan contains specified options.
+      assert(bFlow.dataFrame.get.logicalPlan
+        .asInstanceOf[SubqueryAlias].child
+        .asInstanceOf[LogicalRelation].relation
+        .asInstanceOf[HadoopFsRelation].options.get("x").contains("y"))
+    }
+  }
+
+  test("external pipeline stream read options are propagated during flow function analysis") {
+    val flowFunctionResultTracker = FlowFunctionResultTracker()
+
+    withTable("spark_catalog.default.a", "spark_catalog.test_db.b") {
+      // Create regular external table to stream from with read options.
+      spark.range(10).write.saveAsTable("spark_catalog.default.a")
+
+      val graphRegistrationContext =
+        new InstrumentedTestGraphRegistrationContext(spark, flowFunctionResultTracker) {
+          registerTable(
+            name = "b",
+            query = Option(
+              readStreamFlowFunc(
+                flowNameForTracking = "bFlow",
+                tableName = "spark_catalog.default.a",
+                extraOptions = new CaseInsensitiveStringMap(Map("x" -> "y").asJava)
+              )
+            )
+          )
+        }
+      val unresolvedGraph = graphRegistrationContext.toDataflowGraph
+
+      val updateContext = TestPipelineUpdateContext(spark, unresolvedGraph, storageRoot)
+      updateContext.pipelineExecution.runPipeline()
+      updateContext.pipelineExecution.awaitCompletion()
+
+      val bFlow = flowFunctionResultTracker.flowFunctionResults.get("bFlow").get
+
+      // Verify the flow function's analyzed DF logical plan contains specified options.
+      assert(bFlow.dataFrame.get.logicalPlan
+        .asInstanceOf[SubqueryAlias].child
+        .asInstanceOf[StreamingRelation].dataSource.options.get("x").contains("y"))
+    }
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcCatalogExecutionTestBase.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcCatalogExecutionTestBase.scala
new file mode 100644
index 0000000000000..0dc0a90276600
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcCatalogExecutionTestBase.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.classic.DataFrame
+import org.apache.spark.sql.connector.catalog.{Identifier, TableInfo}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.InMemoryRowLevelOperationTableCatalog
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DataType, LongType, StructType}
+
+/**
+ * Shared test infrastructure for AutoCDC suites that exercise execution paths performing
+ * v2 `MERGE INTO` operations against an in-memory catalog. Provides:
+ *
+ *   - A pre-configured [[InMemoryRowLevelOperationTableCatalog]] registered before each test
+ *     and reset after each test.
+ *   - Stable v2 [[Identifier]] and Catalyst [[TableIdentifier]] values for an auxiliary table
+ *     and a target table.
+ *   - Schema-agnostic primitives: table creation, microbatch [[DataFrame]] construction,
+ *     and CDC metadata helpers parameterized by sequencing type.
+ *
+ * Suites that mix this in are responsible for defining their own schemas (auxiliary, target,
+ * source) and processor / exec instances, then writing thin wrappers around [[createTable]]
+ * to seed those schemas.
+ */
+trait AutoCdcCatalogExecutionTestBase {
+  this: SharedSparkSession with BeforeAndAfter =>
+
+  protected val catalogName: String = "cat"
+  protected val namespace: String = "ns1"
+  protected val auxTableName: String = "aux_table"
+  protected val targetTableName: String = "target_table"
+
+  /** Default DSv2 [[Identifier]] for the auxiliary table. */
+  protected val defaultAuxIdent: Identifier = Identifier.of(Array(namespace), auxTableName)
+  /** Default DSv2 [[Identifier]] for the target table. */
+  protected val defaultTargetIdent: Identifier = Identifier.of(Array(namespace), targetTableName)
+
+  /** Default catalyst three-part [[TableIdentifier]] for the auxiliary table. */
+  protected val defaultAuxTableIdentifier: TableIdentifier = TableIdentifier(
+    table = auxTableName,
+    database = Some(namespace),
+    catalog = Some(catalogName)
+  )
+  /** Default catalyst three-part [[TableIdentifier]] for the target table. */
+  protected val defaultTargetTableIdentifier: TableIdentifier = TableIdentifier(
+    table = targetTableName,
+    database = Some(namespace),
+    catalog = Some(catalogName)
+  )
+
+  before {
+    spark.conf.set(
+      s"spark.sql.catalog.$catalogName",
+      classOf[InMemoryRowLevelOperationTableCatalog].getName
+    )
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.unsetConf(s"spark.sql.catalog.$catalogName")
+  }
+
+  /**
+   * Schema of the [[Scd1BatchProcessor.cdcMetadataColName]] struct column for a given
+   * sequencing column type. Defaults to [[LongType]] because all current SCD1 tests use
+   * `Long` sequencing.
+   */
+  protected def cdcMetadataColSchemaType(sequencingType: DataType = LongType): StructType =
+    new StructType()
+      .add(Scd1BatchProcessor.cdcDeleteSequenceFieldName, sequencingType)
+      .add(Scd1BatchProcessor.cdcUpsertSequenceFieldName, sequencingType)
+
+  /**
+   * Build a [[Row]] matching the [[Scd1BatchProcessor.cdcMetadataColName]] struct's two fields,
+   * in the order produced by [[Scd1BatchProcessor.constructCdcMetadataCol]]:
+   */
+  protected def cdcMetadataRow[T](deleteSeq: Option[T], upsertSeq: Option[T]): Row =
+    Row(deleteSeq.getOrElse(null), upsertSeq.getOrElse(null))
+
+  /**
+   * Create a table in the test catalog under the given DSv2 [[Identifier]] using `schema`,
+   * optionally seeding it with `seedRows`. Pass no rows to create an empty table.
+   */
+  protected def createTable(
+      ident: Identifier,
+      tableIdentifier: TableIdentifier,
+      schema: StructType,
+      seedRows: Row*): Unit = {
+    spark.sessionState.catalogManager
+      .catalog(catalogName)
+      .asTableCatalog
+      .createTable(ident, new TableInfo.Builder().withSchema(schema).build())
+
+    if (seedRows.nonEmpty) {
+      microbatchOf(schema)(seedRows: _*).writeTo(tableIdentifier.quotedString).append()
+    }
+  }
+
+  /** Build a microbatch [[DataFrame]] from explicit `rows` and an explicit `schema`. */
+  protected def microbatchOf(schema: StructType)(rows: Row*): DataFrame =
+    spark.createDataFrame(spark.sparkContext.parallelize(rows), schema)
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcFlowSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcFlowSuite.scala
new file mode 100644
index 0000000000000..cf7c9533bee98
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/AutoCdcFlowSuite.scala
@@ -0,0 +1,568 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import java.util.Locale
+
+import scala.util.Success
+
+import org.apache.spark.sql.{functions => F, AnalysisException, Column, QueryTest, Row}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.classic.DataFrame
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.pipelines.graph.{
+  AutoCdcFlow,
+  AutoCdcMergeFlow,
+  FlowFunction,
+  FlowFunctionResult,
+  Input,
+  QueryContext,
+  QueryOrigin
+}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DataType, IntegerType, LongType, StringType, StructField, StructType}
+
+/**
+ * Unit tests for the [[AutoCdcFlow]] and [[AutoCdcMergeFlow]] that do not execute graph analysis
+ * or execution.
+ */
+class AutoCdcFlowSuite extends QueryTest with SharedSparkSession {
+
+  private val testIdentifier = TableIdentifier("cdc_target", Some("db"))
+
+  /** A no-op [[FlowFunction]] that throws if invoked; AutoCdcFlow tests should never call it. */
+  private val noOpFlowFunction: FlowFunction = new FlowFunction {
+    override def call(
+        allInputs: Set[TableIdentifier],
+        availableInputs: Seq[Input],
+        configuration: Map[String, String],
+        queryContext: QueryContext,
+        queryOrigin: QueryOrigin): FlowFunctionResult =
+      throw new UnsupportedOperationException(
+        "noOpFlowFunction.call should not be invoked from AutoCdcFlowSuite tests"
+      )
+  }
+
+  private val testQueryContext =
+    QueryContext(currentCatalog = Some("test_catalog"), currentDatabase = Some("test_db"))
+
+  private val testChangeArgs = ChangeArgs(
+    keys = Seq(UnqualifiedColumnName("id")),
+    sequencing = F.col("seq"),
+    storedAsScdType = ScdType.Type1
+  )
+
+  private def newAutoCdcFlow(
+      identifier: TableIdentifier = testIdentifier,
+      destinationIdentifier: TableIdentifier = testIdentifier,
+      func: FlowFunction = noOpFlowFunction,
+      queryContext: QueryContext = testQueryContext,
+      sqlConf: Map[String, String] = Map.empty,
+      origin: QueryOrigin = QueryOrigin.empty,
+      changeArgs: ChangeArgs = testChangeArgs): AutoCdcFlow = {
+    AutoCdcFlow(
+      identifier = identifier,
+      destinationIdentifier = destinationIdentifier,
+      func = func,
+      queryContext = queryContext,
+      sqlConf = sqlConf,
+      origin = origin,
+      changeArgs = changeArgs
+    )
+  }
+
+  test("AutoCdcFlow exposes its constructor fields") {
+    val flow = newAutoCdcFlow(
+      sqlConf = Map("spark.sql.shuffle.partitions" -> "8")
+    )
+
+    assert(flow.identifier == testIdentifier)
+    assert(flow.destinationIdentifier == testIdentifier)
+    assert(flow.func eq noOpFlowFunction)
+    assert(flow.queryContext == testQueryContext)
+    assert(flow.sqlConf == Map("spark.sql.shuffle.partitions" -> "8"))
+    assert(flow.origin == QueryOrigin.empty)
+    assert(flow.changeArgs == testChangeArgs)
+  }
+
+  test("AutoCdcFlow defaults sqlConf to empty") {
+    // Confirms the case-class default values match the documented contract; downstream
+    // registration code relies on `sqlConf` being a non-null empty map by default so that
+    // `defaultSqlConf ++ flowDef.sqlConf` is well-defined in [[GraphRegistrationContext]].
+    val flow = AutoCdcFlow(
+      identifier = testIdentifier,
+      destinationIdentifier = testIdentifier,
+      func = noOpFlowFunction,
+      queryContext = testQueryContext,
+      origin = QueryOrigin.empty,
+      changeArgs = testChangeArgs
+    )
+
+    assert(flow.sqlConf.isEmpty)
+  }
+
+  test("AutoCdcFlow.once is always false") {
+    // AutoCDC flows are streaming-only and must run on every batch trigger, never as a
+    // one-shot full-refresh-style flow. Locking this in so a future refactor doesn't
+    // accidentally make `once` configurable.
+
+    // In the future we may intentionally add [[once]] support for AutoCDC flows, at which point
+    // this test can safely be removed.
+    val flow = newAutoCdcFlow()
+    assert(!flow.once)
+  }
+
+  test("AutoCdcFlow.withSqlConf returns a new instance with the updated sqlConf") {
+    val original = newAutoCdcFlow(sqlConf = Map("a" -> "1"))
+    val updated = original.withSqlConf(Map("b" -> "2"))
+
+    assert(updated.sqlConf == Map("b" -> "2"))
+    // All other fields should be preserved verbatim.
+    assert(updated.identifier == original.identifier)
+    assert(updated.destinationIdentifier == original.destinationIdentifier)
+    assert(updated.func eq original.func)
+    assert(updated.queryContext == original.queryContext)
+    assert(updated.origin == original.origin)
+    assert(updated.changeArgs == original.changeArgs)
+    // The original must not be mutated.
+    assert(original.sqlConf == Map("a" -> "1"))
+  }
+
+  // ===========================================================================================
+  // AutoCdcMergeFlow.schema tests
+  // ===========================================================================================
+
+  /** Materializes a successful [[FlowFunctionResult]] backed by the given source dataframe. */
+  private def successfulFuncResult(sourceDf: DataFrame): FlowFunctionResult =
+    FlowFunctionResult(
+      requestedInputs = Set.empty,
+      batchInputs = Set.empty,
+      streamingInputs = Set.empty,
+      usedExternalInputs = Set.empty,
+      dataFrame = Success(sourceDf),
+      sqlConf = Map.empty
+    )
+
+  /** Builds an [[AutoCdcMergeFlow]] over the given source dataframe + change args. */
+  private def newAutoCdcMergeFlow(
+      sourceDf: DataFrame,
+      keys: Seq[UnqualifiedColumnName] = Seq(UnqualifiedColumnName("id")),
+      sequencing: Column = F.col("seq"),
+      storedAsScdType: ScdType = ScdType.Type1,
+      columnSelection: Option[ColumnSelection] = None): AutoCdcMergeFlow = {
+    val flow = newAutoCdcFlow(
+      changeArgs = ChangeArgs(
+        keys = keys,
+        sequencing = sequencing,
+        storedAsScdType = storedAsScdType,
+        columnSelection = columnSelection
+      )
+    )
+    new AutoCdcMergeFlow(flow, successfulFuncResult(sourceDf))
+  }
+
+  /** A stable 3-column source streaming dataframe used across most schema tests. */
+  private def threeColumnSourceDf(): DataFrame = {
+    val session = spark
+    import session.implicits._
+    MemoryStream[(Int, String, Option[Long])].toDS().toDF("id", "name", "seq")
+  }
+
+  /** Convenience to extract the [[StructType]] of the projected `_cdc_metadata` column. */
+  private def cdcMetadataStruct(schema: StructType): StructType =
+    schema(Scd1BatchProcessor.cdcMetadataColName).dataType.asInstanceOf[StructType]
+
+  test(
+    "AutoCdcMergeFlow.schema appends _cdc_metadata to the source schema when no " +
+    "columnSelection is set"
+  ) {
+    val resolvedFlow = newAutoCdcMergeFlow(threeColumnSourceDf())
+
+    val expected = new StructType()
+      .add("id", IntegerType, nullable = false)
+      .add("name", StringType)
+      .add("seq", LongType)
+      .add(
+        StructField(
+          Scd1BatchProcessor.cdcMetadataColName,
+          Scd1BatchProcessor.cdcMetadataColSchema(LongType),
+          nullable = false
+        )
+      )
+    assert(resolvedFlow.schema == expected)
+  }
+
+  test("AutoCdcMergeFlow.schema applies an IncludeColumns selection") {
+    val resolvedFlow = newAutoCdcMergeFlow(
+      sourceDf = threeColumnSourceDf(),
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("id"), UnqualifiedColumnName("seq"))
+        )
+      )
+    )
+
+    val expected = new StructType()
+      .add("id", IntegerType, nullable = false)
+      .add("seq", LongType)
+      .add(
+        StructField(
+          Scd1BatchProcessor.cdcMetadataColName,
+          Scd1BatchProcessor.cdcMetadataColSchema(LongType),
+          nullable = false
+        )
+      )
+    assert(resolvedFlow.schema == expected)
+  }
+
+  test("AutoCdcMergeFlow.schema applies an ExcludeColumns selection") {
+    val resolvedFlow = newAutoCdcMergeFlow(
+      sourceDf = threeColumnSourceDf(),
+      columnSelection = Some(
+        ColumnSelection.ExcludeColumns(Seq(UnqualifiedColumnName("name")))
+      )
+    )
+
+    val expected = new StructType()
+      .add("id", IntegerType, nullable = false)
+      .add("seq", LongType)
+      .add(
+        StructField(
+          Scd1BatchProcessor.cdcMetadataColName,
+          Scd1BatchProcessor.cdcMetadataColSchema(LongType),
+          nullable = false
+        )
+      )
+    assert(resolvedFlow.schema == expected)
+  }
+
+  test(
+    "AutoCdcMergeFlow.schema's _cdc_metadata struct uses the resolved sequencing data type"
+  ) {
+    // Source has a Long `seq` column; sequencing is `cast(seq as int)`, so the projected
+    // `_cdc_metadata` fields should be Int (not Long), demonstrating that the sequencing
+    // expression's *resolved* type drives the metadata schema.
+    val resolvedFlow = newAutoCdcMergeFlow(
+      sourceDf = threeColumnSourceDf(),
+      sequencing = F.col("seq").cast(IntegerType)
+    )
+
+    val metaStruct = cdcMetadataStruct(resolvedFlow.schema)
+    assert(metaStruct == Scd1BatchProcessor.cdcMetadataColSchema(IntegerType))
+  }
+
+  test("AutoCdcMergeFlow.schema's _cdc_metadata field is non-null with nullable inner fields") {
+    val resolvedFlow = newAutoCdcMergeFlow(threeColumnSourceDf())
+
+    val metaField = resolvedFlow.schema(Scd1BatchProcessor.cdcMetadataColName)
+    assert(!metaField.nullable, "_cdc_metadata column itself must be non-null")
+
+    val metaStruct = metaField.dataType.asInstanceOf[StructType]
+    assert(metaStruct(Scd1BatchProcessor.cdcDeleteSequenceFieldName).nullable)
+    assert(metaStruct(Scd1BatchProcessor.cdcUpsertSequenceFieldName).nullable)
+  }
+
+  test("AutoCdcMergeFlow.schema is stable across reads") {
+    // The schema computation calls `df.select(sequencing).schema`, which triggers Spark
+    // analysis. The eagerly-initialized `val` caches the result so downstream consumers get
+    // a stable schema instance across reads.
+    val resolvedFlow = newAutoCdcMergeFlow(threeColumnSourceDf())
+    val first = resolvedFlow.schema
+    val second = resolvedFlow.schema
+    assert(first eq second, "schema should be cached as a val and return the same instance")
+  }
+
+  test("AutoCdcMergeFlow rejects SCD2 at construction with AUTOCDC_SCD2_NOT_SUPPORTED") {
+    // Constructing the flow forces the resolved schema, which is unsupported for SCD2 today.
+    // Failing eagerly (rather than deferring to the first downstream `schema` read) is the
+    // intended UX -- pipeline graph analysis should not be able to register an SCD2 AutoCDC
+    // flow at all.
+    checkError(
+      exception = intercept[AnalysisException] {
+        newAutoCdcMergeFlow(
+          sourceDf = threeColumnSourceDf(),
+          storedAsScdType = ScdType.Type2
+        )
+      },
+      condition = "AUTOCDC_SCD2_NOT_SUPPORTED",
+      sqlState = "0A000",
+      parameters = Map.empty
+    )
+  }
+
+  // ===========================================================================================
+  // AutoCdcMergeFlow.load() contract tests
+  // ===========================================================================================
+
+  test("AutoCdcMergeFlow.load() schema matches AutoCdcMergeFlow.schema") {
+    val resolvedFlow = newAutoCdcMergeFlow(threeColumnSourceDf())
+    val loadedDf = resolvedFlow.load(asStreaming = true)
+    assert(loadedDf.schema == resolvedFlow.schema)
+  }
+
+  test("AutoCdcMergeFlow.load() respects an IncludeColumns selection") {
+    val resolvedFlow = newAutoCdcMergeFlow(
+      sourceDf = threeColumnSourceDf(),
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("id"), UnqualifiedColumnName("seq"))
+        )
+      )
+    )
+    val loadedDf = resolvedFlow.load(asStreaming = true)
+    assert(loadedDf.schema == resolvedFlow.schema)
+    // The user-selected portion drops `name`; the trailing column is the SCD1 metadata.
+    assert(
+      loadedDf.schema.fieldNames.toSeq ==
+      Seq("id", "seq", Scd1BatchProcessor.cdcMetadataColName)
+    )
+  }
+
+  test("AutoCdcMergeFlow.load() respects an ExcludeColumns selection") {
+    val resolvedFlow = newAutoCdcMergeFlow(
+      sourceDf = threeColumnSourceDf(),
+      columnSelection = Some(
+        ColumnSelection.ExcludeColumns(Seq(UnqualifiedColumnName("name")))
+      )
+    )
+    val loadedDf = resolvedFlow.load(asStreaming = true)
+    assert(loadedDf.schema == resolvedFlow.schema)
+    assert(
+      loadedDf.schema.fieldNames.toSeq ==
+      Seq("id", "seq", Scd1BatchProcessor.cdcMetadataColName)
+    )
+  }
+
+  // ===========================================================================================
+  // AutoCdcMergeFlow reserved-prefix validation tests
+  //
+  // The two "contract:" tests below lock in the high-level invariant that no reserved-prefix
+  // column name can be referenced anywhere -- not in the source change-data feed schema, and
+  // not in user-supplied [[ChangeArgs]] (keys or columnSelection). Together they ensure that
+  // (a) users cannot opt out of the reserved CDC metadata column by omitting it from the
+  // selected schema, and (b) users cannot opt in to (or out of) any other reserved-prefix
+  // name we may reserve in the future for an internal CDC concern.
+  //
+  // The remaining tests pin down case-sensitivity nuances of the source-schema validator.
+  // ===========================================================================================
+
+  /** Builds an empty source df with `id` + `seq` + the supplied extra columns. */
+  private def sourceDfWithExtraColumns(extraColumns: (String, DataType)*): DataFrame = {
+    val session = spark
+    import session.implicits._
+    val baseStream = MemoryStream[(Int, Option[Long])].toDS().toDF("id", "seq")
+    extraColumns.foldLeft(baseStream) { case (acc, (name, dt)) =>
+      acc.withColumn(name, F.lit(null).cast(dt))
+    }
+  }
+
+  test(
+    "Contract: a source df column with the reserved AutoCDC prefix is rejected at flow " +
+    "construction"
+  ) {
+    val conflictingName = s"${AutoCdcReservedNames.prefix}foo"
+    val sourceDf = sourceDfWithExtraColumns(conflictingName -> StringType)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        newAutoCdcMergeFlow(sourceDf)
+      },
+      condition = "AUTOCDC_RESERVED_COLUMN_NAME_PREFIX_CONFLICT",
+      sqlState = "42710",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseInsensitive,
+        "columnName" -> conflictingName,
+        "schemaName" -> "changeDataFeed",
+        "reservedColumnNamePrefix" -> AutoCdcReservedNames.prefix
+      )
+    )
+  }
+
+  test(
+    "Contract: ChangeArgs referencing a reserved-prefix column is rejected even when the " +
+    "source df is clean"
+  ) {
+    // The source df has no reserved-prefix columns, but referencing a reserved-prefix column
+    // from any ChangeArgs path still fails at construction with a different error. The
+    // reservation is on the name itself, not on its presence in the source feed.
+    val cleanSourceDf = threeColumnSourceDf()
+    val reservedName = s"${AutoCdcReservedNames.prefix}foo"
+
+    val keysEx = intercept[AnalysisException] {
+      newAutoCdcMergeFlow(
+        sourceDf = cleanSourceDf,
+        keys = Seq(UnqualifiedColumnName(reservedName))
+      )
+    }
+    assert(keysEx.getCondition == "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA")
+
+    val includeEx = intercept[AnalysisException] {
+      newAutoCdcMergeFlow(
+        sourceDf = cleanSourceDf,
+        columnSelection = Some(
+          ColumnSelection.IncludeColumns(
+            Seq(UnqualifiedColumnName("id"), UnqualifiedColumnName(reservedName))
+          )
+        )
+      )
+    }
+    assert(includeEx.getCondition == "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA")
+
+    val excludeEx = intercept[AnalysisException] {
+      newAutoCdcMergeFlow(
+        sourceDf = cleanSourceDf,
+        columnSelection = Some(
+          ColumnSelection.ExcludeColumns(Seq(UnqualifiedColumnName(reservedName)))
+        )
+      )
+    }
+    assert(excludeEx.getCondition == "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA")
+  }
+
+  test(
+    "AutoCdcMergeFlow rejects a source df column whose name equals the reserved CDC " +
+    "metadata column"
+  ) {
+    // Locks in the previous engine-level guard at flow-construction time. Any future
+    // regression where a user-supplied CDC stream carries the reserved metadata column name
+    // should fail eagerly here.
+    val sourceDf = sourceDfWithExtraColumns(Scd1BatchProcessor.cdcMetadataColName -> StringType)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        newAutoCdcMergeFlow(sourceDf)
+      },
+      condition = "AUTOCDC_RESERVED_COLUMN_NAME_PREFIX_CONFLICT",
+      sqlState = "42710",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseInsensitive,
+        "columnName" -> Scd1BatchProcessor.cdcMetadataColName,
+        "schemaName" -> "changeDataFeed",
+        "reservedColumnNamePrefix" -> AutoCdcReservedNames.prefix
+      )
+    )
+  }
+
+  test(
+    "AutoCdcMergeFlow rejects an uppercase reserved-prefix column when caseSensitive=false"
+  ) {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      val conflictingName =
+        s"${AutoCdcReservedNames.prefix}foo".toUpperCase(Locale.ROOT)
+      val sourceDf = sourceDfWithExtraColumns(conflictingName -> StringType)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          newAutoCdcMergeFlow(sourceDf)
+        },
+        condition = "AUTOCDC_RESERVED_COLUMN_NAME_PREFIX_CONFLICT",
+        sqlState = "42710",
+        parameters = Map(
+          "caseSensitivity" -> CaseSensitivityLabels.CaseInsensitive,
+          "columnName" -> conflictingName,
+          "schemaName" -> "changeDataFeed",
+          "reservedColumnNamePrefix" -> AutoCdcReservedNames.prefix
+        )
+      )
+    }
+  }
+
+  test(
+    "AutoCdcMergeFlow allows an uppercase reserved-prefix column when caseSensitive=true"
+  ) {
+    // Under case-sensitive analysis, the uppercase variant is a distinct identifier and does
+    // not collide with the lowercase reserved namespace. Locks in that the validation respects
+    // `spark.sql.caseSensitive`, consistent with the schema-augmentation logic in this class.
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val nonConflictingName =
+        s"${AutoCdcReservedNames.prefix}foo".toUpperCase(Locale.ROOT)
+      val sourceDf = sourceDfWithExtraColumns(nonConflictingName -> StringType)
+
+      // No exception expected: construction succeeds.
+      newAutoCdcMergeFlow(sourceDf)
+    }
+  }
+
+  // ===========================================================================================
+  // AutoCdcMergeFlow keys-presence validation tests (requireKeysPresentInSelectedSchema)
+  // ===========================================================================================
+
+  test("AutoCdcMergeFlow rejects a key that is not present in the source change-data feed") {
+    // No columnSelection: the post-selection schema equals the source schema. The key `id`
+    // is absent from the source df entirely, so the validator must surface a CDC-specific
+    // error rather than deferring to Spark's generic UNRESOLVED_COLUMN.
+    val schema = new StructType()
+      .add("name", StringType)
+      .add("seq", LongType)
+    val sourceDf =
+      spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        newAutoCdcMergeFlow(sourceDf)
+      },
+      condition = "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA",
+      sqlState = "22023",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseInsensitive,
+        "keyColumnName" -> "id"
+      )
+    )
+  }
+
+  test("AutoCdcMergeFlow rejects a key dropped by an IncludeColumns selection") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        newAutoCdcMergeFlow(
+          sourceDf = threeColumnSourceDf(),
+          columnSelection = Some(
+            ColumnSelection.IncludeColumns(
+              Seq(UnqualifiedColumnName("name"), UnqualifiedColumnName("seq"))
+            )
+          )
+        )
+      },
+      condition = "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA",
+      sqlState = "22023",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseInsensitive,
+        "keyColumnName" -> "id"
+      )
+    )
+  }
+
+  test("AutoCdcMergeFlow rejects a key dropped by an ExcludeColumns selection") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        newAutoCdcMergeFlow(
+          sourceDf = threeColumnSourceDf(),
+          columnSelection = Some(
+            ColumnSelection.ExcludeColumns(Seq(UnqualifiedColumnName("id")))
+          )
+        )
+      },
+      condition = "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA",
+      sqlState = "22023",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseInsensitive,
+        "keyColumnName" -> "id"
+      )
+    )
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgsSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgsSuite.scala
new file mode 100644
index 0000000000000..7be111003762f
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgsSuite.scala
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{functions => F, AnalysisException, Row}
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+
+class ChangeArgsSuite extends SparkFunSuite with SharedSparkSession {
+
+  private val sourceSchema = new StructType()
+    .add("id", IntegerType, nullable = false)
+    .add("Name", StringType)
+    .add("age", IntegerType)
+
+  test("ColumnSelection None leaves schema unchanged") {
+    assert(
+      ColumnSelection.applyToSchema(
+        schemaName = "test",
+        schema = sourceSchema,
+        columnSelection = None,
+        caseSensitive = true
+      ) == sourceSchema)
+  }
+
+  test("ColumnSelection IncludeColumns(Seq()) returns an empty schema") {
+    // An explicit empty include-list is semantically distinct from None: it means "select
+    // no columns" and produces an empty StructType, not the original schema.
+    assert(
+      ColumnSelection.applyToSchema(
+        schemaName = "test",
+        schema = sourceSchema,
+        columnSelection = Some(ColumnSelection.IncludeColumns(Seq.empty)),
+        caseSensitive = true
+      ) == new StructType())
+  }
+
+  test("ColumnSelection ExcludeColumns(Seq()) leaves schema unchanged") {
+    // An empty exclude-list is a no-op: nothing to remove, so the original schema is
+    // returned unchanged (same observable behavior as None for this case).
+    assert(
+      ColumnSelection.applyToSchema(
+        schemaName = "test",
+        schema = sourceSchema,
+        columnSelection = Some(ColumnSelection.ExcludeColumns(Seq.empty)),
+        caseSensitive = true
+      ) == sourceSchema)
+  }
+
+  test("ColumnSelection IncludeColumns filters by exact name in schema order") {
+    val filteredSchema = ColumnSelection.applyToSchema(
+      schemaName = "test",
+      schema = sourceSchema,
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("age"), UnqualifiedColumnName("Name"))
+        )
+      ),
+      caseSensitive = true
+    )
+
+    assert(filteredSchema == new StructType()
+      .add("Name", StringType)
+      .add("age", IntegerType))
+  }
+
+  test("ColumnSelection ExcludeColumns filters by exact name") {
+    val filteredSchema = ColumnSelection.applyToSchema(
+      schemaName = "test",
+      schema = sourceSchema,
+      columnSelection = Some(
+        ColumnSelection.ExcludeColumns(Seq(UnqualifiedColumnName("id")))
+      ),
+      caseSensitive = true
+    )
+
+    assert(filteredSchema == new StructType()
+      .add("Name", StringType)
+      .add("age", IntegerType))
+  }
+
+  test("ColumnSelection IncludeColumns fails for columns not present in schema") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        ColumnSelection.applyToSchema(
+          schemaName = "test",
+          schema = sourceSchema,
+          // Under caseSensitive = true, "name" will not match the schema field "Name".
+          columnSelection = Some(
+            ColumnSelection.IncludeColumns(
+              Seq(UnqualifiedColumnName("name"), UnqualifiedColumnName("missing"))
+            )
+          ),
+          caseSensitive = true
+        )
+      },
+      condition = "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA",
+      sqlState = "42703",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseSensitive,
+        "schemaName" -> "test",
+        "missingColumns" -> "name, missing",
+        "availableColumns" -> "id, Name, age"
+      )
+    )
+  }
+
+  test("ColumnSelection ExcludeColumns fails for columns not present in schema") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        ColumnSelection.applyToSchema(
+          schemaName = "test",
+          schema = sourceSchema,
+          // Under caseSensitive = true, "NAME" will not match the schema field "Name".
+          columnSelection = Some(
+            ColumnSelection.ExcludeColumns(
+              Seq(UnqualifiedColumnName("NAME"), UnqualifiedColumnName("missing"))
+            )
+          ),
+          caseSensitive = true
+        )
+      },
+      condition = "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA",
+      sqlState = "42703",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseSensitive,
+        "schemaName" -> "test",
+        "missingColumns" -> "NAME, missing",
+        "availableColumns" -> "id, Name, age"
+      )
+    )
+  }
+
+  test("ColumnSelection IncludeColumns matches case-insensitively under caseSensitive=false") {
+    // "NAME" and "AGE" do not exactly match the schema fields "Name" and "age", but
+    // caseSensitive = false folds both sides to lowercase before comparing.
+    val filteredSchema = ColumnSelection.applyToSchema(
+      schemaName = "test",
+      schema = sourceSchema,
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("AGE"), UnqualifiedColumnName("NAME"))
+        )
+      ),
+      caseSensitive = false
+    )
+
+    // The retained fields keep their original casing from the schema, not the user's input.
+    assert(filteredSchema == new StructType()
+      .add("Name", StringType)
+      .add("age", IntegerType))
+  }
+
+  test("ColumnSelection deduplicates user-provided columns that normalize to the same name") {
+    // Under caseSensitive = false, "name" and "NAME" both fold to "name" and refer to the same
+    // schema field. The returned schema must include "Name" once, not twice. Output ordering
+    // and casing follow the schema, not the user's input.
+    val filteredSchema = ColumnSelection.applyToSchema(
+      schemaName = "test",
+      schema = sourceSchema,
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("name"), UnqualifiedColumnName("NAME"))
+        )
+      ),
+      caseSensitive = false
+    )
+
+    assert(filteredSchema == new StructType().add("Name", StringType))
+  }
+
+  test("ColumnSelection ExcludeColumns matches case-insensitively under caseSensitive=false") {
+    val filteredSchema = ColumnSelection.applyToSchema(
+      schemaName = "test",
+      schema = sourceSchema,
+      columnSelection = Some(
+        ColumnSelection.ExcludeColumns(Seq(UnqualifiedColumnName("name")))
+      ),
+      caseSensitive = false
+    )
+
+    assert(filteredSchema == new StructType()
+      .add("id", IntegerType, nullable = false)
+      .add("age", IntegerType))
+  }
+
+  test("ColumnSelection missing-column error under caseSensitive=false preserves user casing") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        ColumnSelection.applyToSchema(
+          schemaName = "test",
+          schema = sourceSchema,
+          // "NAME" matches "Name" under caseSensitive=false, but "Missing" has no schema match.
+          // The error message reports the user's original casing for the missing column and
+          // the schema's original casing for the available columns.
+          columnSelection = Some(
+            ColumnSelection.IncludeColumns(
+              Seq(UnqualifiedColumnName("NAME"), UnqualifiedColumnName("Missing"))
+            )
+          ),
+          caseSensitive = false
+        )
+      },
+      condition = "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA",
+      sqlState = "42703",
+      parameters = Map(
+        "caseSensitivity" -> CaseSensitivityLabels.CaseInsensitive,
+        "schemaName" -> "test",
+        "missingColumns" -> "Missing",
+        "availableColumns" -> "id, Name, age"
+      )
+    )
+  }
+
+  test("UnqualifiedColumnName accepts a simple single-part identifier") {
+    assert(UnqualifiedColumnName("col").name == "col")
+    // .quoted always wraps in back-ticks, even when the input had none.
+    assert(UnqualifiedColumnName("col").quoted == "`col`")
+  }
+
+  test("UnqualifiedColumnName accepts a backtick-quoted name containing a literal dot") {
+    // Backticks make the dot part of a single name part, so this passes validation. The
+    // stored name is the parsed (unquoted) form so it matches the actual schema field name.
+    assert(UnqualifiedColumnName("`a.b`").name == "a.b")
+    // .quoted re-wraps the parsed name in back-ticks, round-tripping back to the input form.
+    assert(UnqualifiedColumnName("`a.b`").quoted == "`a.b`")
+  }
+
+  test("UnqualifiedColumnName accepts redundant backticks around a single-part name") {
+    // Backticks around an already-single-part identifier are decorative; the parser strips them
+    // so the stored name has no surrounding back-ticks.
+    assert(UnqualifiedColumnName("`col`").name == "col")
+    // .quoted re-wraps the parsed name in back-ticks, round-tripping back to the input form.
+    assert(UnqualifiedColumnName("`col`").quoted == "`col`")
+  }
+
+  test("UnqualifiedColumnName.quoted is safe to pass to functions.col for literal-dot names") {
+    val schema = new StructType()
+      .add("a.b", IntegerType)
+      .add("c", IntegerType)
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(1, 2), Row(3, 4))),
+      schema
+    )
+
+    val key = UnqualifiedColumnName("`a.b`")
+
+    // Sanity-check: the unquoted `name` is not safe to pass to `functions.col`. The string is
+    // re-parsed and the literal dot is interpreted as a nested-field path separator, so the
+    // analyzer fails to resolve `a`.`b` against the available top-level columns.
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(F.col(key.name)).collect()
+      },
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = "42703",
+      parameters = Map(
+        "objectName" -> "`a`.`b`",
+        "proposal" -> "`a.b`, `c`"
+      ),
+      context = ExpectedContext(
+        fragment = "col",
+        callSitePattern = ""
+      )
+    )
+
+    // The `quoted` form wraps the name in back-ticks so the re-parser treats the whole thing
+    // as a single identifier, resolving to the top-level "a.b" column.
+    assert(df.select(F.col(key.quoted)).collect().toSeq == Seq(Row(1), Row(3)))
+  }
+
+  test("IncludeColumns correctly matches a backtick-quoted literal-dot column") {
+    val schema = new StructType()
+      .add("a.b", IntegerType)
+      .add("c", StringType)
+
+    // The user writes `a.b` to refer to the literal-dot column "a.b" in the schema. After
+    // construction, the [[UnqualifiedColumnName]] holds "a.b", which matches the field name
+    // exactly and the column is included in the filtered schema.
+    val filteredSchema = ColumnSelection.applyToSchema(
+      schemaName = "test",
+      schema = schema,
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(Seq(UnqualifiedColumnName("`a.b`")))
+      ),
+      caseSensitive = true
+    )
+
+    assert(filteredSchema == new StructType().add("a.b", IntegerType))
+  }
+
+  test("IncludeColumns correctly matches a backtick-quoted mixed-case column") {
+    val filteredSchema = ColumnSelection.applyToSchema(
+      schemaName = "test",
+      schema = sourceSchema,
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(Seq(UnqualifiedColumnName("`Name`")))
+      ),
+      caseSensitive = true
+    )
+
+    assert(filteredSchema == new StructType().add("Name", StringType))
+  }
+
+  test("UnqualifiedColumnName rejects a dotted (multi-part) identifier") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        UnqualifiedColumnName("a.b")
+      },
+      condition = "AUTOCDC_MULTIPART_COLUMN_IDENTIFIER",
+      sqlState = "22023",
+      parameters = Map(
+        "columnName" -> "a.b",
+        "nameParts" -> "a, b"
+      )
+    )
+  }
+
+  test("UnqualifiedColumnName rejects a qualified column reference") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        UnqualifiedColumnName("src.x")
+      },
+      condition = "AUTOCDC_MULTIPART_COLUMN_IDENTIFIER",
+      sqlState = "22023",
+      parameters = Map(
+        "columnName" -> "src.x",
+        "nameParts" -> "src, x"
+      )
+    )
+  }
+
+  test("UnqualifiedColumnName rejects an identifier with three or more parts") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        UnqualifiedColumnName("a.b.c")
+      },
+      condition = "AUTOCDC_MULTIPART_COLUMN_IDENTIFIER",
+      sqlState = "22023",
+      parameters = Map(
+        "columnName" -> "a.b.c",
+        "nameParts" -> "a, b, c"
+      )
+    )
+  }
+
+  test("ChangeArgs rejects an empty key list") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        ChangeArgs(
+          keys = Seq.empty,
+          sequencing = F.col("seq"),
+          storedAsScdType = ScdType.Type1
+        )
+      },
+      condition = "AUTOCDC_EMPTY_KEYS",
+      sqlState = "22023",
+      parameters = Map.empty
+    )
+  }
+
+  test("UnqualifiedColumnName lets a ParseException from the SQL parser propagate") {
+    checkError(
+      exception = intercept[ParseException] {
+        UnqualifiedColumnName("")
+      },
+      condition = "PARSE_EMPTY_STATEMENT",
+      sqlState = Some("42617")
+    )
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorMergeSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorMergeSuite.scala
new file mode 100644
index 0000000000000..475d25f5aa2cf
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorMergeSuite.scala
@@ -0,0 +1,541 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{functions => F, AnalysisException, Row}
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.util.QuotingUtils
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+/**
+ * Tests for [[Scd1BatchProcessor]] methods that perform a `MERGE INTO` against a registered
+ * v2 table. These tests require a v2 catalog that supports row-level operations
+ * (set up by [[AutoCdcCatalogExecutionTestBase]]) and run actual writes through Catalyst's
+ * row-level-operations machinery, so they are kept separate from the pure-DataFrame-transform
+ * tests in [[Scd1BatchProcessorSuite]].
+ */
+class Scd1BatchProcessorMergeSuite
+    extends QueryTest
+    with SharedSparkSession
+    with BeforeAndAfter
+    with AutoCdcCatalogExecutionTestBase {
+
+  /**
+   * Minimal valid shape for both the auxiliary table and microbatch inputs in these tests:
+   * a single key column `id` plus the CDC metadata struct. The auxiliary table genuinely
+   * has only this shape in production, and the merge function reduces its microbatch input
+   * down to keys + `_cdc_metadata` regardless of incoming data columns -- so most tests can
+   * use this single schema for both ends.
+   */
+  private val minimalSchema: StructType = new StructType()
+    .add("id", IntegerType)
+    .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+
+  /** Minimal target-table shape: one key, one data column, and CDC metadata. */
+  private val targetSchema: StructType = new StructType()
+    .add("id", IntegerType)
+    .add("value", StringType)
+    .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+
+  /**
+   * A processor with a single key column `id`. `sequencing` is irrelevant for
+   * merge functions in this suite: they operate entirely on the already-computed CDC metadata
+   * column, never on the raw sequencing expression.
+   */
+  private val processor = Scd1BatchProcessor(
+    changeArgs = ChangeArgs(
+      keys = Seq(UnqualifiedColumnName("id")),
+      sequencing = F.lit(0L),
+      storedAsScdType = ScdType.Type1
+    ),
+    resolvedSequencingType = LongType
+  )
+
+  /** Create the auxiliary table using [[minimalSchema]], optionally seeded with `seedRows`. */
+  private def createAuxTable(seedRows: Row*): Unit =
+    createTable(defaultAuxIdent, defaultAuxTableIdentifier, minimalSchema, seedRows: _*)
+
+  /** Create the target table using [[targetSchema]], optionally seeded with `seedRows`. */
+  private def createTargetTable(seedRows: Row*): Unit =
+    createTable(defaultTargetIdent, defaultTargetTableIdentifier, targetSchema, seedRows: _*)
+
+  /**
+   * Build an auxiliary-table schema with the given key columns followed by the standard CDC
+   * metadata struct. Used by tests that need a non-trivial key shape (composite or dotted).
+   */
+  private def customKeyAuxSchema(keyColumns: Seq[(String, DataType)]): StructType = {
+    val withKeys = keyColumns.foldLeft(new StructType()) { case (s, (name, dt)) =>
+      s.add(name, dt)
+    }
+    withKeys.add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+  }
+
+  /**
+   * Create the auxiliary table at [[defaultAuxIdent]] using `schema` and optionally seed it
+   * with `seedRows`. Used by tests that need a non-trivial key shape (composite or dotted).
+   */
+  private def createAuxTableWithSchema(schema: StructType, seedRows: Row*): Unit =
+    createTable(defaultAuxIdent, defaultAuxTableIdentifier, schema, seedRows: _*)
+
+  /**
+   * `(name, dataType)` pairs of `schema`'s fields, used to compare two schemas for structural
+   * equivalence while deliberately ignoring nullability and metadata.
+   */
+  private def columnNamesAndDataTypes(schema: StructType): Seq[(String, DataType)] =
+    schema.fields.map(f => (f.name, f.dataType)).toSeq
+
+  // =============== mergeMicrobatchOntoAuxiliaryTable tests ===============
+
+  test("mergeMicrobatchOntoAuxiliaryTable replaces an existing tombstone with a newer " +
+    "microbatch tombstone, dropping any microbatch-only data columns") {
+    createAuxTable(Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)))
+
+    // The microbatch carries an extra `value` data column that has no place in the auxiliary
+    // table. mergeMicrobatchOntoAuxiliaryTable must project it away before merging, both to
+    // satisfy MergeIntoTable's schema requirements and to keep the auxiliary table free of
+    // unrelated columns.
+    val microbatchSchema = new StructType()
+      .add("id", IntegerType)
+      .add("value", StringType)
+      .add(
+        Scd1BatchProcessor.cdcMetadataColName,
+        new StructType()
+          .add(Scd1BatchProcessor.cdcDeleteSequenceFieldName, LongType)
+          .add(Scd1BatchProcessor.cdcUpsertSequenceFieldName, LongType)
+      )
+    val microbatch = microbatchOf(microbatchSchema)(
+      Row(1, "data-leak", cdcMetadataRow(deleteSeq = Some(20L), upsertSeq = None))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    // Tombstone advanced to delete=20, with exactly one row per key (no duplicate tombstones).
+    checkAnswer(resultAuxTable, Row(1, Row(20L, null)))
+    // Schema strictly matches minimalSchema; the `value` column was dropped, not smuggled in.
+    assert(columnNamesAndDataTypes(resultAuxTable.schema) == columnNamesAndDataTypes(minimalSchema))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable deletes an existing tombstone when superseded by a " +
+    "newer microbatch upsert") {
+    createAuxTable(Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)))
+
+    val microbatch = microbatchOf(minimalSchema)(
+      Row(1, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L)))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    assert(resultAuxTable.collect().isEmpty)
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable inserts a new tombstone for a previously-untracked " +
+    "key") {
+    createAuxTable()
+
+    val microbatch = microbatchOf(minimalSchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    checkAnswer(resultAuxTable, Row(1, Row(10L, null)))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable leaves rows for unrelated keys untouched") {
+    createAuxTable(Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)))
+
+    // Microbatch event affects a different key entirely; the existing tombstone for id=1 must
+    // not be touched even though the new tombstone's sequence is much larger.
+    val microbatch = microbatchOf(minimalSchema)(
+      Row(2, cdcMetadataRow(deleteSeq = Some(100L), upsertSeq = None))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    checkAnswer(resultAuxTable, Seq(
+      Row(1, Row(10L, null)),
+      Row(2, Row(100L, null))
+    ))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable ignores microbatch deletes whose sequence is older " +
+    "than the existing tombstone") {
+    // This documents that mergeMicrobatchOntoAuxiliaryTable's contract is stronger than just
+    // relying on applyTombstonesToMicrobatch having filtered out stale events upstream: even
+    // an unfiltered stale incoming delete must not regress the high-water mark.
+    createAuxTable(Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)))
+
+    val microbatch = microbatchOf(minimalSchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(5L), upsertSeq = None))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    checkAnswer(resultAuxTable, Row(1, Row(10L, null)))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable ignores microbatch upserts whose sequence is older " +
+    "than the existing tombstone") {
+    createAuxTable(Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)))
+
+    val microbatch = microbatchOf(minimalSchema)(
+      Row(1, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5L)))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    checkAnswer(resultAuxTable, Row(1, Row(10L, null)))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable applies the tied-sequence asymmetry: equal deletes " +
+    "are kept, equal upserts delete the tombstone") {
+    // On a delete<->upsert sequencing tie, upsert events are given priority over deletes;
+    // therefore an incoming upsert with the same sequence as a tombstone should delete the
+    // tombstone. On a delete<->delete sequencing tie, the effect is a no-op. This is an
+    // internal SCD1 tie-breaking convention, not a publicly documented contract.
+    createAuxTable(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)),
+      Row(2, cdcMetadataRow(deleteSeq = Some(20L), upsertSeq = None))
+    )
+
+    val microbatch = microbatchOf(minimalSchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)),
+      Row(2, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L)))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    // Row 1's tombstone remains the same, but row 2's tombstone should be marked as stale and
+    // deleted.
+    checkAnswer(resultAuxTable, Row(1, Row(10L, null)))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable upsert event for different key does not affect " +
+    "tombstone") {
+    createAuxTable(Row(2, cdcMetadataRow(deleteSeq = Some(5L), upsertSeq = None)))
+
+    val microbatch = microbatchOf(minimalSchema)(
+      // Although the upsert seq is 10, this is for key=1; tombstone for key=2 should be unaffected.
+      Row(1, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L)))
+    )
+
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    checkAnswer(resultAuxTable, Row(2, Row(5L, null)))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable is idempotent across a microbatch that exercises " +
+    "every merge clause") {
+    // The auxiliary table starts with three tombstones; the microbatch then exercises every
+    // merge clause simultaneously:
+    //   - id=1: aux tombstone superseded by a microbatch upsert
+    //   - id=2: aux tombstone advanced by a newer microbatch delete
+    //   - id=3: untouched by the microbatch
+    //   - id=4: new tombstone for an untracked key
+    createAuxTable(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)),
+      Row(2, cdcMetadataRow(deleteSeq = Some(20L), upsertSeq = None)),
+      Row(3, cdcMetadataRow(deleteSeq = Some(30L), upsertSeq = None))
+    )
+
+    val microbatch = microbatchOf(minimalSchema)(
+      Row(1, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(15L))),
+      Row(2, cdcMetadataRow(deleteSeq = Some(25L), upsertSeq = None)),
+      Row(4, cdcMetadataRow(deleteSeq = Some(40L), upsertSeq = None))
+    )
+
+    val expectedAfterMerge = Seq(
+      Row(2, Row(25L, null)),
+      Row(3, Row(30L, null)),
+      Row(4, Row(40L, null))
+    )
+
+    // First merge applies all three clauses exactly once.
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+    val auxTableAfterFirstMerge = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    checkAnswer(auxTableAfterFirstMerge, expectedAfterMerge)
+
+    // Re-applying the same microbatch is a no-op:
+    //   - id=1 is absent from aux; whenNotMatched is gated on delete events => skipped.
+    //   - id=2 has tied delete (incoming==aux); strict `>` in the update clause fails.
+    //   - id=4 has tied delete (incoming==aux); same reason.
+    processor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+    val auxTableAfterSecondMerge = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    checkAnswer(auxTableAfterSecondMerge, expectedAfterMerge)
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable correctly inserts tombstones for composite key") {
+    // Composite key: (region, customer_id). The merge join condition is the AND of every key
+    // column equality, so an aux row sharing only `region` with the microbatch must NOT be
+    // touched, while the microbatch row must be inserted as a new tombstone.
+    val compositeSchema = customKeyAuxSchema(Seq(
+      "region" -> StringType,
+      "customer_id" -> IntegerType
+    ))
+    createAuxTableWithSchema(
+      compositeSchema,
+      Row("US", 99, cdcMetadataRow(deleteSeq = Some(50L), upsertSeq = None))
+    )
+
+    val compositeKeyProcessor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("region"), UnqualifiedColumnName("customer_id")),
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val microbatch = microbatchOf(compositeSchema)(
+      Row("US", 1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None))
+    )
+
+    compositeKeyProcessor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    checkAnswer(spark.read.table(defaultAuxTableIdentifier.quotedString), Seq(
+      Row("US", 99, Row(50L, null)),
+      Row("US", 1, Row(10L, null))
+    ))
+  }
+
+  test("mergeMicrobatchOntoAuxiliaryTable correctly merges for backticked/dotted keys") {
+    // Even though the column is a backticked identifier in user-facing DDL, Spark drops the
+    // backticks during schema resolution so the field name is the literal `user.id`. The merge
+    // path must propagate the user's quoted identifier through `k.quoted` so the join condition
+    // and update target both resolve to the same physical column.
+    val dottedKeySchema = customKeyAuxSchema(Seq("user.id" -> IntegerType))
+    createAuxTableWithSchema(
+      dottedKeySchema,
+      Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None))
+    )
+
+    val dottedKeyProcessor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("`user.id`")),
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    // We expect the existing tombstone with del seq=10 to be advanced to 20 if the merge matches
+    // dotted keys correctly.
+    val microbatch = microbatchOf(dottedKeySchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(20L), upsertSeq = None))
+    )
+
+    dottedKeyProcessor.mergeMicrobatchOntoAuxiliaryTable(microbatch, defaultAuxTableIdentifier)
+
+    checkAnswer(spark.read.table(defaultAuxTableIdentifier.quotedString), Row(1, Row(20L, null)))
+  }
+
+  // =============== mergeMicrobatchOntoTarget tests ===============
+
+  test("mergeMicrobatchOntoTarget updates an existing row with a newer upsert") {
+    createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    val microbatch = microbatchOf(targetSchema)(
+      Row(1, "new", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L)))
+    )
+
+    processor.mergeMicrobatchOntoTarget(microbatch, defaultTargetTableIdentifier)
+
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(1, "new", Row(null, 20L)))
+    assert(columnNamesAndDataTypes(resultTargetTable.schema) ==
+      columnNamesAndDataTypes(targetSchema))
+  }
+
+  test("mergeMicrobatchOntoTarget deletes an existing row with a newer delete") {
+    createTargetTable(
+      Row(1, "delete-me", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))),
+      Row(2, "keep-me", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L)))
+    )
+
+    val microbatch = microbatchOf(targetSchema)(
+      Row(1, "unused", cdcMetadataRow(deleteSeq = Some(15L), upsertSeq = None))
+    )
+
+    processor.mergeMicrobatchOntoTarget(microbatch, defaultTargetTableIdentifier)
+
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(2, "keep-me", Row(null, 20L)))
+  }
+
+  test("mergeMicrobatchOntoTarget inserts new upserts but not new (tombstone) deletes") {
+    createTargetTable(Row(1, "existing", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    val microbatch = microbatchOf(targetSchema)(
+      Row(2, "insert-me", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L))),
+      Row(3, "do-not-insert", cdcMetadataRow(deleteSeq = Some(30L), upsertSeq = None))
+    )
+
+    processor.mergeMicrobatchOntoTarget(microbatch, defaultTargetTableIdentifier)
+
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Seq(
+      Row(1, "existing", Row(null, 10L)),
+      Row(2, "insert-me", Row(null, 20L))
+    ))
+  }
+
+  test("mergeMicrobatchOntoTarget ignores stale upserts and stale deletes") {
+    createTargetTable(
+      Row(1, "target-delete-tie", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))),
+      Row(2, "target-newer", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L)))
+    )
+
+    val microbatch = microbatchOf(targetSchema)(
+      Row(1, "delete-tie", cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)),
+      Row(2, "older-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(15L)))
+    )
+
+    processor.mergeMicrobatchOntoTarget(microbatch, defaultTargetTableIdentifier)
+
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Seq(
+      Row(1, "target-delete-tie", Row(null, 10L)),
+      Row(2, "target-newer", Row(null, 20L))
+    ))
+  }
+
+  test("mergeMicrobatchOntoTarget gives tied upserts priority over the target row") {
+    createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    val microbatch = microbatchOf(targetSchema)(
+      Row(1, "same-sequence-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L)))
+    )
+
+    processor.mergeMicrobatchOntoTarget(microbatch, defaultTargetTableIdentifier)
+
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(1, "same-sequence-upsert", Row(null, 10L)))
+  }
+
+  test("mergeMicrobatchOntoTarget correctly matches escaped key column names") {
+    // The raw key name contains special characters that would require being escaped on name
+    // resolution.
+    val rawKeyName = "a`b"
+    val schemaWithSpecialKeyCharacters = new StructType()
+      // The schema always stores the backtick consumed column name, so unticked the raw name here.
+      .add(rawKeyName, IntegerType)
+      .add("value", StringType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+
+    createTable(
+      defaultTargetIdent,
+      defaultTargetTableIdentifier,
+      schemaWithSpecialKeyCharacters,
+      Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L)))
+    )
+
+    val processorForCustomKeySchema = processor.copy(
+      changeArgs = processor.changeArgs.copy(
+        keys = Seq(UnqualifiedColumnName(QuotingUtils.quoteIdentifier(rawKeyName)))
+      )
+    )
+    val microbatch = microbatchOf(schemaWithSpecialKeyCharacters)(
+      Row(1, "new", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L)))
+    )
+
+    processorForCustomKeySchema.mergeMicrobatchOntoTarget(
+      microbatch,
+      defaultTargetTableIdentifier
+    )
+
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(1, "new", Row(null, 20L)))
+  }
+
+  gridTest(
+    "mergeMicrobatchOntoTarget key column comparison respects spark session case sensitivity"
+  )(Seq(false, true)) { caseSensitive =>
+    withSQLConf("spark.sql.caseSensitive" -> caseSensitive.toString) {
+      createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+      val processorWithUpperCaseKey = processor.copy(
+        changeArgs = processor.changeArgs.copy(
+          keys = Seq(UnqualifiedColumnName("ID"))
+        )
+      )
+
+      val microbatch = microbatchOf(targetSchema)(
+        Row(1, "new", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L)))
+      )
+
+      if (caseSensitive) {
+        val ex = intercept[AnalysisException] {
+          processorWithUpperCaseKey.mergeMicrobatchOntoTarget(
+            microbatch,
+            defaultTargetTableIdentifier
+          )
+        }
+        // Intentionally not using checkError here, to avoid asserting on a brittle query context
+        // and long message parmeters list.
+        assert(ex.errorClass.contains("UNRESOLVED_COLUMN.WITH_SUGGESTION"))
+      } else {
+        processorWithUpperCaseKey.mergeMicrobatchOntoTarget(
+          microbatch,
+          defaultTargetTableIdentifier
+        )
+        val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+        checkAnswer(resultTargetTable, Row(1, "new", Row(null, 20L)))
+      }
+    }
+  }
+
+  test("mergeMicrobatchOntoTarget is idempotent across a microbatch") {
+    createTargetTable(
+      Row(1, "delete-me", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))),
+      Row(2, "update-me", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(20L))),
+      Row(3, "untouched", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(30L)))
+    )
+
+    val microbatch = microbatchOf(targetSchema)(
+      Row(1, "delete-event", cdcMetadataRow(deleteSeq = Some(15L), upsertSeq = None)),
+      Row(2, "updated", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(25L))),
+      Row(4, "inserted", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(40L))),
+      Row(5, "absent-delete", cdcMetadataRow(deleteSeq = Some(50L), upsertSeq = None))
+    )
+
+    val expectedAfterMerge = Seq(
+      Row(2, "updated", Row(null, 25L)),
+      Row(3, "untouched", Row(null, 30L)),
+      Row(4, "inserted", Row(null, 40L))
+    )
+
+    processor.mergeMicrobatchOntoTarget(microbatch, defaultTargetTableIdentifier)
+    val targetTableAfterFirstMerge = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(targetTableAfterFirstMerge, expectedAfterMerge)
+
+    processor.mergeMicrobatchOntoTarget(microbatch, defaultTargetTableIdentifier)
+    val targetTableAfterSecondMerge = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(targetTableAfterSecondMerge, expectedAfterMerge)
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorSuite.scala
new file mode 100644
index 0000000000000..9432150c40167
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessorSuite.scala
@@ -0,0 +1,1199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.apache.spark.sql.{functions => F, AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.classic.DataFrame
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class Scd1BatchProcessorSuite extends QueryTest with SharedSparkSession {
+
+  /**
+   * Test Schema for a microbatch that already has the SCD1 CDC metadata column projected.
+   */
+  private val microbatchWithCdcMetadataSchema: StructType = new StructType()
+    .add("id", IntegerType)
+    .add("name", StringType)
+    .add("age", IntegerType)
+    .add(
+      Scd1BatchProcessor.cdcMetadataColName,
+      new StructType()
+        .add(Scd1BatchProcessor.cdcDeleteSequenceFieldName, LongType)
+        .add(Scd1BatchProcessor.cdcUpsertSequenceFieldName, LongType)
+    )
+
+  /** DataType for the CDC metadata column, where sequencing type is Long. */
+  private val cdcMetadataColSchemaType: DataType = new StructType()
+    .add(Scd1BatchProcessor.cdcDeleteSequenceFieldName, LongType)
+    .add(Scd1BatchProcessor.cdcUpsertSequenceFieldName, LongType)
+
+  /**
+   * Helper to construct a CDC metadata column row, following [[cdcMetadataColSchemaType]].
+   */
+  private def cdcMetadataRow(deleteSeq: Option[Long], upsertSeq: Option[Long]): Row =
+    Row(deleteSeq.getOrElse(null), upsertSeq.getOrElse(null))
+
+
+  /** Build a microbatch [[DataFrame]] from explicit rows and an explicit schema. */
+  private def microbatchOf(schema: StructType)(rows: Row*): DataFrame =
+    spark.createDataFrame(spark.sparkContext.parallelize(rows), schema)
+
+  /**
+   * Returns the `(name, dataType)` pairs of `schema`'s fields. Used to compare two schemas for
+   * structural equivalence while deliberately ignoring nullability and metadata, which can shift
+   * benignly when columns are unpacked from a struct.
+   */
+  private def columnNamesAndDataTypes(schema: StructType): Seq[(String, DataType)] =
+    schema.fields.map(f => (f.name, f.dataType)).toSeq
+
+  // =============== deduplicateMicrobatch tests ===============
+
+  test("deduplicateMicrobatch keeps only the row with the largest sequence value per key") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "first"),
+      Row(1, 30L, "winner"),
+      Row(1, 20L, "middle")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(1, 30L, "winner")
+    )
+  }
+
+  test("deduplicateMicrobatch is no-op if there's a single event for a key") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "only-row")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(1, 10L, "only-row")
+    )
+  }
+
+  test("deduplicateMicrobatch handles equal sequencing values for the same key") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "first-tied-row"),
+      Row(1, 10L, "second-tied-row")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    // On equal sequence number events for the same key we provide no guarantee on which event will
+    // survive, but the contract is _one_ event will survive - assert that below.
+    val result = processor.deduplicateMicrobatch(batch).collect()
+    assert(result.length == 1)
+    assert(result.head.getInt(0) == 1)
+    assert(result.head.getLong(1) == 10L)
+    assert(Set("first-tied-row", "second-tied-row").contains(result.head.getString(2)))
+  }
+
+  test("deduplicateMicrobatch ignores rows with null sequencing when a non-null value exists") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      // In production the expectation is the microbatch will have been validated to not contain
+      // any null sequence values, but demonstrate that null sequence rows are de-prioritized in
+      // deduplication.
+      Row(1, null, "null-sequence"),
+      Row(1, 10L, "non-null-sequence")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(1, 10L, "non-null-sequence")
+    )
+  }
+
+  test(
+    "deduplicateMicrobatch returns a null row when all sequencing values for a key are null"
+  ) {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+    val batch = microbatchOf(schema)(
+      // In production the expectation is the microbatch will have been validated to not contain
+      // any null sequence values, but demonstrate that a null row will be returned by
+      // deduplication if all rows contain a null sequence in the microbatch.
+      Row(1, null, "null-sequence")
+    )
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(null, null, null)
+    )
+  }
+
+  test("deduplicateMicrobatch processes multiple keys independently") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "a1"),
+      Row(2, 50L, "b1-winner"),
+      Row(1, 20L, "a2-winner"),
+      Row(2, 40L, "b2-loser"),
+      Row(3, 1L, "c1-only")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Seq(
+        Row(1, 20L, "a2-winner"),
+        Row(2, 50L, "b1-winner"),
+        Row(3, 1L, "c1-only")
+      )
+    )
+  }
+
+  test("deduplicateMicrobatch carries non-key, non-sequence columns from the winning row") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("name", StringType)
+      .add("amount", IntegerType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "old-name", 100),
+      Row(1, 20L, "winning-name", 200)
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    // All non-key columns must come from the row with the largest sequence value, never
+    // a mix of values from multiple rows.
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(1, 20L, "winning-name", 200)
+    )
+  }
+
+  test("deduplicateMicrobatch carries nested columns correctly from the winning row") {
+    val payloadType = new StructType()
+      .add("name", StringType)
+      .add("amount", IntegerType)
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("payload", payloadType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, Row("old", 100)),
+      Row(1, 20L, Row("new", 200))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(1, 20L, Row("new", 200))
+    )
+  }
+
+  test("deduplicateMicrobatch supports composite (multi-column) keys") {
+    val schema = new StructType()
+      .add("region", StringType)
+      .add("customer_id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row("US", 1, 10L, "us1-old"),
+      Row("US", 1, 20L, "us1-new"),
+      // Same customer_id as above but different region: independent group.
+      Row("EU", 1, 5L, "eu1-only"),
+      // Same region as above but different customer_id: independent group.
+      Row("US", 2, 99L, "us2-only")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("region"), UnqualifiedColumnName("customer_id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Seq(
+        Row("US", 1, 20L, "us1-new"),
+        Row("EU", 1, 5L, "eu1-only"),
+        Row("US", 2, 99L, "us2-only")
+      )
+    )
+  }
+
+  test("deduplicateMicrobatch supports an arbitrary sequencing expression") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("alt_seq", LongType)
+      .add("value", StringType)
+
+    // The sequencing expression is a function call referencing multiple columns, not a bare
+    // identifier. Locks in that `max_by(..., changeArgs.sequencing)` evaluates the full
+    // expression per-row rather than treating `sequencing` as a single column reference.
+    val batch = microbatchOf(schema)(
+      // greatest(10, 30) = 30 - winner under the expression.
+      Row(1, 10L, 30L, "winner"),
+      // greatest(25, 20) = 25 - would win under `seq` alone, but loses under `greatest`.
+      Row(1, 25L, 20L, "would-win-on-seq-alone"),
+      Row(1, 15L, 15L, "always-loses")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.greatest(F.col("seq"), F.col("alt_seq")),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(1, 10L, 30L, "winner")
+    )
+  }
+
+  test("deduplicateMicrobatch supports literal-dot column names") {
+    val schema = new StructType()
+      .add("user.id", IntegerType)
+      .add("seq", LongType)
+      .add("event.value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "old"),
+      Row(1, 20L, "new")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("`user.id`")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.deduplicateMicrobatch(batch),
+      expectedAnswer = Row(1, 20L, "new")
+    )
+  }
+
+  test(
+    "deduplicateMicrobatch fails when a key column collides with the reserved name"
+  ) {
+    val reservedColName = Scd1BatchProcessor.winningRowColName
+
+    val schema = new StructType()
+      .add(reservedColName, StringType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row("k1", 10L, "loser"),
+      Row("k1", 20L, "winner")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName(reservedColName)),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        processor.deduplicateMicrobatch(batch).collect()
+      },
+      condition = "AMBIGUOUS_REFERENCE",
+      sqlState = "42704",
+      parameters = Map(
+        "name" -> s"`$reservedColName`",
+        "referenceNames" -> s"[`$reservedColName`, `$reservedColName`]"
+      ),
+      context = ExpectedContext(fragment = "col", callSitePattern = "")
+    )
+  }
+
+  test("deduplicateMicrobatch preserves the input column names, types, and ordering") {
+    val schema = new StructType()
+      .add("a", StringType)
+      .add("id", IntegerType)
+      .add("z", DoubleType)
+      .add("seq", LongType)
+      .add("flag", BooleanType)
+
+    val batch = microbatchOf(schema)(
+      Row("a1", 1, 1.5, 10L, true),
+      Row("a2", 1, 2.5, 20L, false)
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    // Field names and dataTypes must match the input exactly, in the original order.
+    assert(
+      columnNamesAndDataTypes(processor.deduplicateMicrobatch(batch).schema) ==
+        columnNamesAndDataTypes(schema))
+  }
+
+  test("deduplicateMicrobatch returns an empty DataFrame with preserved schema") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)()
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.deduplicateMicrobatch(batch)
+    assert(result.collect().isEmpty)
+    assert(columnNamesAndDataTypes(result.schema) == columnNamesAndDataTypes(schema))
+  }
+
+  // =============== extendMicrobatchRowsWithCdcMetadata tests ===============
+
+  test("extendMicrobatchRowsWithCdcMetadata classifies each row as a delete or an upsert " +
+    "per deleteCondition") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("is_delete", BooleanType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, false),
+      Row(2, 20L, true),
+      Row(3, 30L, false),
+      Row(4, 40L, true)
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        deleteCondition = Some(F.col("is_delete") === true)
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    // Mutual-exclusivity invariant: each row's CDC metadata struct has exactly one of
+    // (deleteSequence, upsertSequence) non-null, and the non-null side carries the row's
+    // sequence value.
+    checkAnswer(
+      df = processor.extendMicrobatchRowsWithCdcMetadata(batch),
+      expectedAnswer = Seq(
+        Row(1, 10L, false, Row(null, 10L)),
+        Row(2, 20L, true, Row(20L, null)),
+        Row(3, 30L, false, Row(null, 30L)),
+        Row(4, 40L, true, Row(40L, null))
+      )
+    )
+  }
+
+  test("extendMicrobatchRowsWithCdcMetadata treats null deleteCondition results as upserts") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("is_delete", BooleanType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, null)
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        deleteCondition = Some(F.col("is_delete"))
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.extendMicrobatchRowsWithCdcMetadata(batch),
+      expectedAnswer = Row(1, 10L, null, Row(null, 10L))
+    )
+  }
+
+  test("extendMicrobatchRowsWithCdcMetadata treats every row as an upsert " +
+    "when deleteCondition is None") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "a"),
+      Row(2, 20L, "b")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        deleteCondition = None
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.extendMicrobatchRowsWithCdcMetadata(batch),
+      expectedAnswer = Seq(
+        Row(1, 10L, "a", Row(null, 10L)),
+        Row(2, 20L, "b", Row(null, 20L))
+      )
+    )
+  }
+
+  test("extendMicrobatchRowsWithCdcMetadata appends CDC metadata as the last column") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      .add("seq", LongType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10L, "a")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.extendMicrobatchRowsWithCdcMetadata(batch)
+
+    // Original columns are preserved in their original order, with CDC metadata appended at
+    // the very end.
+    assert(result.schema.fieldNames.toSeq ==
+      schema.fieldNames.toSeq :+ Scd1BatchProcessor.cdcMetadataColName)
+  }
+
+  test("extendMicrobatchRowsWithCdcMetadata casts delete / upsert sequence fields to " +
+    "resolvedSequencingType") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      // Microbatch's sequencing column is IntegerType, but the flow's resolved sequencing type
+      // will be LongType. This should be upcasted in the projected CDC metadata column.
+      .add("seq", IntegerType)
+      .add("value", StringType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, 10, "a")
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val resultDf = processor.extendMicrobatchRowsWithCdcMetadata(batch)
+
+    val cdcMetadataDataType =
+      resultDf.schema(Scd1BatchProcessor.cdcMetadataColName).dataType.asInstanceOf[StructType]
+    assert(columnNamesAndDataTypes(cdcMetadataDataType) == Seq(
+      Scd1BatchProcessor.cdcDeleteSequenceFieldName -> LongType,
+      Scd1BatchProcessor.cdcUpsertSequenceFieldName -> LongType))
+
+    // The cast must also succeed at runtime: upsertSequence is materialized as a Long value, not
+    // an Int.
+    checkAnswer(
+      df = resultDf,
+      expectedAnswer = Row(1, 10, "a", Row(null, 10L))
+    )
+  }
+
+  test("extendMicrobatchRowsWithCdcMetadata fails fast when the microbatch's sequencing column " +
+    "is incompatible with resolvedSequencingType") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      // Microbatch's sequencing column is a struct, whereas the flow's resolved sequencing type
+      // will be LongType. These are incompatible and should throw.
+      .add(
+        "seq",
+        new StructType()
+          .add("major", LongType)
+          .add("minor", LongType))
+
+    val batch = microbatchOf(schema)(
+      Row(1, Row(1L, 0L))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val ex = intercept[AnalysisException] {
+      // .schema forces analysis of the underlying logical plan, surfacing the invalid cast.
+      processor.extendMicrobatchRowsWithCdcMetadata(batch).schema
+    }
+    assert(ex.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+  }
+
+  test("projectTargetColumnsOntoMicrobatch keeps every user column and the CDC metadata column " +
+    "when columnSelection is None") {
+    val batch = microbatchOf(microbatchWithCdcMetadataSchema)(
+      Row(1, "alice", 30, Row(null, 10L)),
+      Row(2, "bob", 25, Row(20L, null))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        columnSelection = None
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.projectTargetColumnsOntoMicrobatch(batch)
+
+    // None selection is no-op on the user columns, and the CDC metadata column is unconditionally
+    // re-projected last, so the output shape exactly matches the input.
+    assert(result.schema.fieldNames.toSeq == microbatchWithCdcMetadataSchema.fieldNames.toSeq)
+    checkAnswer(
+      df = result,
+      expectedAnswer = Seq(
+        Row(1, "alice", 30, Row(null, 10L)),
+        Row(2, "bob", 25, Row(20L, null))
+      )
+    )
+  }
+
+  test("projectTargetColumnsOntoMicrobatch retains the CDC metadata column even when " +
+    "IncludeColumns does not contain it") {
+    val batch = microbatchOf(microbatchWithCdcMetadataSchema)(
+      Row(1, "alice", 30, Row(null, 10L))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        columnSelection = Some(
+          ColumnSelection.IncludeColumns(
+            Seq(UnqualifiedColumnName("id"), UnqualifiedColumnName("age"))
+          )
+        )
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.projectTargetColumnsOntoMicrobatch(batch)
+
+    assert(result.schema.fieldNames.toSeq ==
+      Seq("id", "age", Scd1BatchProcessor.cdcMetadataColName))
+    checkAnswer(
+      df = result,
+      expectedAnswer = Row(1, 30, Row(null, 10L))
+    )
+  }
+
+  test("projectTargetColumnsOntoMicrobatch respects exclude column") {
+    val batch = microbatchOf(microbatchWithCdcMetadataSchema)(
+      Row(1, "alice", 30, Row(null, 10L))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        columnSelection = Some(
+          ColumnSelection.ExcludeColumns(
+            Seq(UnqualifiedColumnName("age"))
+          )
+        )
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.projectTargetColumnsOntoMicrobatch(batch)
+
+    assert(
+      result.schema.fieldNames.toSeq ==
+        Seq("id", "name", Scd1BatchProcessor.cdcMetadataColName)
+    )
+    checkAnswer(
+      df = result,
+      expectedAnswer = Row(1, "alice", Row(null, 10L))
+    )
+  }
+
+  test("projectTargetColumnsOntoMicrobatch preserves the microbatch schema order") {
+    val batch = microbatchOf(microbatchWithCdcMetadataSchema)(
+      Row(1, "alice", 30, Row(null, 10L))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        // User specifies (age, id) -- intentionally different from the schema order (id, age).
+        columnSelection = Some(ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("age"), UnqualifiedColumnName("id"))
+        ))
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.projectTargetColumnsOntoMicrobatch(batch)
+
+    // Output column order follows the original microbatch schema (id before age), not the order
+    // in which the user listed columns in IncludeColumns. The CDC metadata column is appended
+    // last as always.
+    assert(result.schema.fieldNames.toSeq ==
+      Seq("id", "age", Scd1BatchProcessor.cdcMetadataColName))
+
+    checkAnswer(
+      df = result,
+      expectedAnswer = Row(1, 30, Row(null, 10L))
+    )
+  }
+
+  test("projectTargetColumnsOntoMicrobatch handles backticked column names containing a " +
+    "literal dot") {
+    val schema = new StructType()
+      .add("id", IntegerType)
+      // Even if a column is created with backticks via DDL, those backticks are consumed by Spark
+      // before resolving the schema; they won't show up in the schema field.
+      .add("user.id", StringType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType)
+
+    val batch = microbatchOf(schema)(
+      Row(1, "u-100", Row(null, 10L))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        columnSelection = Some(
+          ColumnSelection.IncludeColumns(
+            Seq(
+              UnqualifiedColumnName("id"),
+              UnqualifiedColumnName("`user.id`")
+            )
+          )
+        )
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.projectTargetColumnsOntoMicrobatch(batch)
+
+    assert(result.schema.fieldNames.toSeq ==
+      Seq("id", "user.id", Scd1BatchProcessor.cdcMetadataColName))
+    checkAnswer(
+      df = result,
+      expectedAnswer = Row(1, "u-100", Row(null, 10L))
+    )
+  }
+
+  test("projectTargetColumnsOntoMicrobatch resolves columnSelection case-insensitively " +
+    "when SQLConf.CASE_SENSITIVE=false") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      val batch = microbatchOf(microbatchWithCdcMetadataSchema)(
+        Row(1, "alice", 30, Row(null, 10L))
+      )
+
+      val processor = Scd1BatchProcessor(
+        changeArgs = ChangeArgs(
+          keys = Seq(UnqualifiedColumnName("id")),
+          sequencing = F.col("seq"),
+          storedAsScdType = ScdType.Type1,
+          // User columns intentionally use a different case than the schema (id, age).
+          columnSelection = Some(
+            ColumnSelection.IncludeColumns(
+              Seq(UnqualifiedColumnName("ID"), UnqualifiedColumnName("AGE"))
+            )
+          )
+        ),
+        resolvedSequencingType = LongType
+      )
+
+      val result = processor.projectTargetColumnsOntoMicrobatch(batch)
+
+      // Output column names follow the microbatch schema's casing, not the casing in the user's
+      // columnSelection. The CDC metadata column is appended last as always.
+      assert(result.schema.fieldNames.toSeq ==
+        Seq("id", "age", Scd1BatchProcessor.cdcMetadataColName))
+      checkAnswer(
+        df = result,
+        expectedAnswer = Row(1, 30, Row(null, 10L))
+      )
+    }
+  }
+
+  // =============== applyTombstonesToMicrobatch tests ===============
+
+  /**
+   * Schema for the microbatch input to [[Scd1BatchProcessor.applyTombstonesToMicrobatch]]
+   * tests.
+   */
+  private val applyTombstonesToMicrobatchTestMicrobatchSchema: StructType = new StructType()
+    // Key column.
+    .add("id", IntegerType)
+    // Data column.
+    .add("value", StringType)
+    // CDC metadata column.
+    .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType)
+
+  /**
+   * Schema for the auxiliary input to [[Scd1BatchProcessor.applyTombstonesToMicrobatch]] tests.
+   *
+   * In practice for SCD1 the auxiliary table only carries key columns and the CDC metadata
+   * column -- never user data columns -- so we mirror that production-side asymmetry here,
+   * even though the function's API contract would allow a single shared schema.
+   */
+  private val applyTombstonesToMicrobatchTestAuxiliarySchema: StructType = new StructType()
+    // Key column.
+    .add("id", IntegerType)
+    // CDC metadata column.
+    .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType)
+
+  test("applyTombstonesToMicrobatch drops late-arriving deletes and upserts when a matching " +
+    "tombstone exists for the same key") {
+    // Both microbatch events have an effective sequence strictly less than the tombstone's
+    // delete sequence, so they must be anti-joined out of the microbatch regardless of whether
+    // they are deletes or upserts.
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "stale-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5))),
+      Row(1, "stale-delete", cdcMetadataRow(deleteSeq = Some(7), upsertSeq = None))
+    )
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10), upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.applyTombstonesToMicrobatch(microbatch, auxiliary)
+    assert(result.collect().isEmpty)
+  }
+
+  test("applyTombstonesToMicrobatch keeps a microbatch row whose effective sequence ties the " +
+    "tombstone's delete sequence") {
+    // The join uses strict `<`, so a microbatch row with the same effective sequence as the
+    // tombstone is kept. This is an internal tie-breaking convention for SCD1 only, and is
+    // *not* a publicly documented contract: if external callers ever start relying on it, both
+    // this test and the join condition in applyTombstonesToMicrobatch should move together.
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "tied-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10)))
+    )
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10), upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.applyTombstonesToMicrobatch(microbatch, auxiliary),
+      expectedAnswer = Row(1, "tied-upsert", Row(null, 10L))
+    )
+  }
+
+  test("applyTombstonesToMicrobatch keeps microbatch rows whose effective sequence exceeds the " +
+    "tombstone's delete sequence") {
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "fresher-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(15))),
+      Row(1, "fresher-delete", cdcMetadataRow(deleteSeq = Some(20), upsertSeq = None))
+    )
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10), upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.applyTombstonesToMicrobatch(microbatch, auxiliary),
+      expectedAnswer = Seq(
+        Row(1, "fresher-upsert", Row(null, 15L)),
+        Row(1, "fresher-delete", Row(20L, null))
+      )
+    )
+  }
+
+  test("applyTombstonesToMicrobatch leaves microbatch rows untouched when the tombstone targets " +
+    "a different key") {
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "stays", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5)))
+    )
+    // Tombstone on a different key with a much larger sequence; the key match must guard
+    // against cross-key application no matter how stale the microbatch row's sequence is.
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)(
+      Row(2, cdcMetadataRow(deleteSeq = Some(1000), upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.applyTombstonesToMicrobatch(microbatch, auxiliary),
+      expectedAnswer = Row(1, "stays", Row(null, 5L))
+    )
+  }
+
+  test("applyTombstonesToMicrobatch with composite keys requires every key column to match") {
+    val schema = new StructType()
+      .add("region", StringType)
+      .add("customer_id", IntegerType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType)
+
+    val microbatch = microbatchOf(schema)(
+      Row("US", 1, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5))),
+      Row("US", 2, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5)))
+    )
+    // Tombstone matches on `region` only; `customer_id` differs from every microbatch row.
+    // The join condition is the AND of all key column equalities, so neither microbatch row
+    // should be dropped.
+    val auxiliary = microbatchOf(schema)(
+      Row("US", 99, cdcMetadataRow(deleteSeq = Some(1000), upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("region"), UnqualifiedColumnName("customer_id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.applyTombstonesToMicrobatch(microbatch, auxiliary),
+      expectedAnswer = Seq(
+        Row("US", 1, Row(null, 5L)),
+        Row("US", 2, Row(null, 5L))
+      )
+    )
+  }
+
+  test("applyTombstonesToMicrobatch supports backticked key names containing a literal dot") {
+    val schema = new StructType()
+      .add("user.id", IntegerType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType)
+
+    val microbatch = microbatchOf(schema)(
+      Row(1, cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5)))
+    )
+    val auxiliary = microbatchOf(schema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10), upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("`user.id`")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.applyTombstonesToMicrobatch(microbatch, auxiliary)
+    assert(result.collect().isEmpty)
+  }
+
+  test("applyTombstonesToMicrobatch is a no-op when the auxiliary table is empty") {
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "kept-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5))),
+      Row(2, "kept-delete", cdcMetadataRow(deleteSeq = Some(7), upsertSeq = None))
+    )
+
+    // Empty auxiliary: no rows means the left-anti join cannot match any microbatch row, so the
+    // microbatch passes through untouched regardless of its contents.
+
+    // Conceptually, this means there are no tombstones that could potentially have delete-matched
+    // against incoming rows in the microbatch.
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)()
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.applyTombstonesToMicrobatch(microbatch, auxiliary),
+      expectedAnswer = Seq(
+        Row(1, "kept-upsert", Row(null, 5L)),
+        Row(2, "kept-delete", Row(7L, null))
+      )
+    )
+  }
+
+  test("applyTombstonesToMicrobatch keeps microbatch rows when the matching aux row has a " +
+    "null deleteSequence") {
+    // SCD1's tombstone-merge invariant guarantees aux rows always have a non-null
+    // deleteSequence, but if a corrupt aux row ever does carry a null deleteSequence, the
+    // join's `<` predicate evaluates to null (SQL 3-valued logic) and the microbatch row is
+    // retained -- a safe fallback that never silently drops data.
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "kept-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5)))
+    )
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)(
+      Row(1, cdcMetadataRow(deleteSeq = None, upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    checkAnswer(
+      df = processor.applyTombstonesToMicrobatch(microbatch, auxiliary),
+      expectedAnswer = Row(1, "kept-upsert", Row(null, 5L))
+    )
+  }
+
+  test("applyTombstonesToMicrobatch is unaffected by  stale tombstones in auxiliary table") {
+    // SCD1's tombstone-merge invariant guarantees at most one tombstone per key in the
+    // auxiliary, but if multiple ever coexist for the same key, the left-anti semantics drop
+    // the microbatch row whenever *any* matching tombstone has a strictly greater
+    // deleteSequence.
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "stale-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(8)))
+    )
+    // Two tombstones on key=1: one stale (deleteSeq=5, doesn't dominate the microbatch row's
+    // effective seq of 8), one fresh (deleteSeq=10, dominates). The fresh one alone is enough
+    // to drop the microbatch row.
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(5), upsertSeq = None)),
+      Row(1, cdcMetadataRow(deleteSeq = Some(10), upsertSeq = None))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.applyTombstonesToMicrobatch(microbatch, auxiliary)
+    assert(result.collect().isEmpty)
+  }
+
+  test("applyTombstonesToMicrobatch ignores the aux row's upsertSequence even when it is set") {
+    // SCD1's tombstone-merge invariant guarantees aux rows always have a null upsertSequence
+    // (by definition, an aux row is an unswallowed tombstone). But if a corrupt aux row ever
+    // has both fields set, only its deleteSequence is read by the join condition; the
+    // upsertSequence is never inspected, so the row continues to behave as a pure tombstone.
+    val microbatch = microbatchOf(applyTombstonesToMicrobatchTestMicrobatchSchema)(
+      Row(1, "stale-upsert", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5)))
+    )
+    // Aux row with both fields populated; only deleteSeq=10 drives the tombstone-drop decision.
+    val auxiliary = microbatchOf(applyTombstonesToMicrobatchTestAuxiliarySchema)(
+      Row(1, cdcMetadataRow(deleteSeq = Some(10), upsertSeq = Some(20)))
+    )
+
+    val processor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        // Sequencing is irrelevant for applyTombstonesToMicrobatch; it is already encoded
+        // into the CDC metadata column.
+        sequencing = F.lit(0L),
+        storedAsScdType = ScdType.Type1
+      ),
+      resolvedSequencingType = LongType
+    )
+
+    val result = processor.applyTombstonesToMicrobatch(microbatch, auxiliary)
+    assert(result.collect().isEmpty)
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1ForeachBatchHandlerSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1ForeachBatchHandlerSuite.scala
new file mode 100644
index 0000000000000..76790847ede5c
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/autocdc/Scd1ForeachBatchHandlerSuite.scala
@@ -0,0 +1,639 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.autocdc
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{functions => F, AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+/**
+ * E2E unit tests for the Scd1ForeachBatchHandler class.
+ */
+class Scd1ForeachBatchHandlerSuite
+    extends QueryTest
+    with SharedSparkSession
+    with BeforeAndAfter
+    with AutoCdcCatalogExecutionTestBase {
+
+  private val sourceSchema = new StructType()
+    .add("id", IntegerType)
+    .add("value", StringType)
+    .add("seq", LongType)
+    .add("is_delete", BooleanType)
+
+  private val auxiliarySchema = new StructType()
+    .add("id", IntegerType)
+    .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+
+  private val targetSchema = new StructType()
+    .add("id", IntegerType)
+    .add("value", StringType)
+    .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+
+  private val processor = Scd1BatchProcessor(
+    changeArgs = ChangeArgs(
+      keys = Seq(UnqualifiedColumnName("id")),
+      sequencing = F.col("seq"),
+      storedAsScdType = ScdType.Type1,
+      deleteCondition = Some(F.col("is_delete")),
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("id"), UnqualifiedColumnName("value"))
+        )
+      )
+    ),
+    resolvedSequencingType = LongType
+  )
+
+  /** Create the auxiliary table using [[auxiliarySchema]], optionally seeded with `seedRows`. */
+  private def createAuxTable(seedRows: Row*): Unit =
+    createTable(defaultAuxIdent, defaultAuxTableIdentifier, auxiliarySchema, seedRows: _*)
+
+  /** Create the target table using [[targetSchema]], optionally seeded with `seedRows`. */
+  private def createTargetTable(seedRows: Row*): Unit =
+    createTable(defaultTargetIdent, defaultTargetTableIdentifier, targetSchema, seedRows: _*)
+
+  private def exec: Scd1ForeachBatchHandler = Scd1ForeachBatchHandler(
+    batchProcessor = processor,
+    auxiliaryTableIdentifier = defaultAuxTableIdentifier,
+    targetTableIdentifier = defaultTargetTableIdentifier
+  )
+
+  // ===========================================================================================
+  // Microbatch validation tests
+  // ===========================================================================================
+
+  test(
+    "Scd1ForeachBatchHandler invalidates rows with null sequencing before merging to aux/target " +
+    "tables."
+  ) {
+    createAuxTable()
+    createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    val batch = microbatchOf(sourceSchema)(
+      Row(1, "invalid", null, false)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        exec.execute(batch, batchId = 123L)
+      },
+      condition = "AUTOCDC_MICROBATCH_VALIDATION.NULL_SEQUENCE",
+      sqlState = "22000",
+      parameters = Map(
+        "tableName" -> defaultTargetTableIdentifier.quotedString,
+        "batchId" -> "123",
+        "nullCount" -> "1"
+      )
+    )
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    assert(resultAuxTable.collect().isEmpty)
+    checkAnswer(resultTargetTable, Row(1, "old", Row(null, 10L)))
+  }
+
+  test(
+    "Scd1ForeachBatchHandler invalidates rows with a null key column before merging to aux/target"
+  ) {
+    createAuxTable()
+    createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    // Two rows have a null id; one row is well-formed. The validator must surface a count
+    // of two without writing anything to the aux or target table.
+    val batch = microbatchOf(sourceSchema)(
+      Row(null, "no-id-1", 5L, false),
+      Row(2, "ok", 6L, false),
+      Row(null, "no-id-2", 7L, true)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        exec.execute(batch, batchId = 13L)
+      },
+      condition = "AUTOCDC_MICROBATCH_VALIDATION.NULL_KEY",
+      sqlState = "22000",
+      parameters = Map(
+        "tableName" -> defaultTargetTableIdentifier.quotedString,
+        "batchId" -> "13",
+        "nullKeyCounts" -> "`id`=2"
+      )
+    )
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    assert(resultAuxTable.collect().isEmpty)
+    checkAnswer(resultTargetTable, Row(1, "old", Row(null, 10L)))
+  }
+
+  test(
+    "Scd1ForeachBatchHandler invalidates rows when any column of a composite key is null"
+  ) {
+    // Composite [country, city] key. The validator must report per-column null counts in
+    // the configured key order (country before city).
+    val compositeSourceSchema = new StructType()
+      .add("country", StringType)
+      .add("city", StringType)
+      .add("seq", LongType)
+      .add("is_delete", BooleanType)
+    val compositeAuxSchema = new StructType()
+      .add("country", StringType)
+      .add("city", StringType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+    val compositeTargetSchema = new StructType()
+      .add("country", StringType)
+      .add("city", StringType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+
+    val compositeProcessor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("country"), UnqualifiedColumnName("city")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        deleteCondition = Some(F.col("is_delete"))
+      ),
+      resolvedSequencingType = LongType
+    )
+    val compositeExec = Scd1ForeachBatchHandler(
+      batchProcessor = compositeProcessor,
+      auxiliaryTableIdentifier = defaultAuxTableIdentifier,
+      targetTableIdentifier = defaultTargetTableIdentifier
+    )
+
+    createTable(defaultAuxIdent, defaultAuxTableIdentifier, compositeAuxSchema)
+    createTable(defaultTargetIdent, defaultTargetTableIdentifier, compositeTargetSchema)
+
+    // country is null in 2 rows, city is null in 2 rows (one row has both null).
+    val batch = microbatchOf(compositeSourceSchema)(
+      Row(null, "Boston", 1L, false),
+      Row("US", null, 2L, false),
+      Row(null, null, 3L, false)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        compositeExec.execute(batch, batchId = 7L)
+      },
+      condition = "AUTOCDC_MICROBATCH_VALIDATION.NULL_KEY",
+      sqlState = "22000",
+      parameters = Map(
+        "tableName" -> defaultTargetTableIdentifier.quotedString,
+        "batchId" -> "7",
+        "nullKeyCounts" -> "`country`=2, `city`=2"
+      )
+    )
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    assert(resultAuxTable.collect().isEmpty)
+    assert(resultTargetTable.collect().isEmpty)
+  }
+
+  test(
+    "Scd1ForeachBatchHandler surfaces the null-sequence error before the null-key error"
+  ) {
+    // A single row has both a null sequence and a null id. The validator must surface the
+    // sequence error first to preserve the existing precedence.
+    createAuxTable()
+    createTargetTable()
+
+    val batch = microbatchOf(sourceSchema)(
+      Row(null, "bad", null, false)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        exec.execute(batch, batchId = 99L)
+      },
+      condition = "AUTOCDC_MICROBATCH_VALIDATION.NULL_SEQUENCE",
+      sqlState = "22000",
+      parameters = Map(
+        "tableName" -> defaultTargetTableIdentifier.quotedString,
+        "batchId" -> "99",
+        "nullCount" -> "1"
+      )
+    )
+  }
+
+  test(
+    "Scd1ForeachBatchHandler validates that the microbatch's sequencing column is orderable"
+  ) {
+    createAuxTable()
+    createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    val batchSchema = new StructType()
+      .add("id", IntegerType)
+      .add("value", StringType)
+      .add("seq", MapType(StringType, IntegerType))
+      .add("is_delete", BooleanType)
+    val batch = microbatchOf(batchSchema)(
+      Row(1, "invalid", Map("k" -> 1), false)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        exec.execute(batch, batchId = 124L)
+      },
+      condition = "AUTOCDC_MICROBATCH_VALIDATION.NON_ORDERABLE_SEQUENCE",
+      sqlState = "22000",
+      parameters = Map(
+        "tableName" -> defaultTargetTableIdentifier.quotedString,
+        "batchId" -> "124",
+        "dataType" -> "map<string,int>"
+      )
+    )
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    assert(resultAuxTable.collect().isEmpty)
+    checkAnswer(resultTargetTable, Row(1, "old", Row(null, 10L)))
+  }
+
+  // ===========================================================================================
+  // Core SCD1 transformation tests
+  // ===========================================================================================
+
+  test(
+    "Scd1ForeachBatchHandler drops stale microbatch rows using auxiliary tombstones and writes " +
+    "fresh upserts"
+  ) {
+    createAuxTable(Row(1, cdcMetadataRow(deleteSeq = Some(10L), upsertSeq = None)))
+    createTargetTable()
+
+    val batch = microbatchOf(sourceSchema)(
+      Row(1, "stale", 5L, false),
+      Row(2, "fresh", 20L, false)
+    )
+
+    exec.execute(batch, batchId = 0L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultAuxTable, Row(1, Row(10L, null)))
+    checkAnswer(resultTargetTable, Row(2, "fresh", Row(null, 20L)))
+  }
+
+  test(
+    "Scd1ForeachBatchHandler persists a newer delete as a tombstone and removes the target row"
+  ) {
+    createAuxTable()
+    createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    val batch = microbatchOf(sourceSchema)(
+      Row(1, "unused", 20L, true)
+    )
+
+    exec.execute(batch, batchId = 1L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultAuxTable, Row(1, Row(20L, null)))
+    assert(resultTargetTable.collect().isEmpty)
+  }
+
+  test(
+    "Scd1ForeachBatchHandler deduplicates the raw microbatch before merging into the target"
+  ) {
+    createAuxTable()
+    createTargetTable(Row(1, "old", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(10L))))
+
+    val batch = microbatchOf(sourceSchema)(
+      Row(1, "ignored-older", 15L, false),
+      Row(1, "newer", 20L, false)
+    )
+
+    exec.execute(batch, batchId = 2L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    assert(resultAuxTable.collect().isEmpty)
+    checkAnswer(resultTargetTable, Row(1, "newer", Row(null, 20L)))
+  }
+
+  test(
+    "Scd1ForeachBatchHandler reconciles out-of-order events when ExcludeColumns hides the " +
+    "sequencing column"
+  ) {
+    // ExcludeColumns omits the sequencing column ("seq") and the delete marker ("is_delete")
+    // from persisted rows. The sequencing expression itself still drives CDC reconciliation;
+    // this test verifies that several out-of-order events across six batches converge to the
+    // correct target state without ever materializing those columns.
+    val excludeProcessor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("id")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        deleteCondition = Some(F.col("is_delete")),
+        columnSelection = Some(
+          ColumnSelection.ExcludeColumns(
+            Seq(UnqualifiedColumnName("seq"), UnqualifiedColumnName("is_delete"))
+          )
+        )
+      ),
+      resolvedSequencingType = LongType
+    )
+    val excludeExec = Scd1ForeachBatchHandler(
+      batchProcessor = excludeProcessor,
+      auxiliaryTableIdentifier = defaultAuxTableIdentifier,
+      targetTableIdentifier = defaultTargetTableIdentifier
+    )
+
+    createAuxTable()
+    createTargetTable()
+
+    // Batch 1: highest-seq event in the batch wins on insert.
+    excludeExec.execute(
+      microbatchOf(sourceSchema)(
+        Row(1, "alice", 1L, false),
+        Row(1, "bob", 3L, false)
+      ),
+      batchId = 0L
+    )
+
+    // Batch 2: out-of-order older upsert (seq=2) must not overwrite the live row at seq=3.
+    excludeExec.execute(microbatchOf(sourceSchema)(Row(1, "carol", 2L, false)), batchId = 1L)
+
+    // Batch 3: even-newer upsert wins.
+    excludeExec.execute(microbatchOf(sourceSchema)(Row(1, "dave", 4L, false)), batchId = 2L)
+
+    // Batch 4: out-of-order older delete (seq=2) must not erase the live row at seq=4.
+    excludeExec.execute(microbatchOf(sourceSchema)(Row(1, null, 2L, true)), batchId = 3L)
+
+    val targetTableAfterBatch4 = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(targetTableAfterBatch4, Row(1, "dave", Row(null, 4L)))
+
+    // Batch 5: newer delete (seq=5) wipes the row from the target.
+    excludeExec.execute(microbatchOf(sourceSchema)(Row(1, null, 5L, true)), batchId = 4L)
+
+    // Batch 6: out-of-order pre-delete upsert (seq=4) is suppressed by the tombstone.
+    excludeExec.execute(microbatchOf(sourceSchema)(Row(1, "ghost", 4L, false)), batchId = 5L)
+
+    val auxTableAfterBatch6 = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val targetTableAfterBatch6 = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    assert(targetTableAfterBatch6.collect().isEmpty)
+    checkAnswer(auxTableAfterBatch6, Row(1, Row(5L, null)))
+  }
+
+  test(
+    "Scd1ForeachBatchHandler upserts an existing target row when a higher-sequenced event arrives"
+  ) {
+    createAuxTable()
+    createTargetTable(Row(1, "alice", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(1L))))
+
+    exec.execute(microbatchOf(sourceSchema)(Row(1, "bob", 2L, false)), batchId = 0L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(1, "bob", Row(null, 2L)))
+    assert(resultAuxTable.collect().isEmpty)
+  }
+
+  test(
+    "Scd1ForeachBatchHandler records an aux tombstone for a delete on a nonexistent key without" +
+      " affecting the target"
+  ) {
+    // A delete event for a key that never existed in the target must still be recorded in
+    // the auxiliary table, because a strictly older upsert for the same key arriving in a
+    // later batch must be suppressed by that tombstone.
+    createAuxTable()
+    createTargetTable()
+
+    exec.execute(microbatchOf(sourceSchema)(Row(99, null, 1L, true)), batchId = 0L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    assert(resultTargetTable.collect().isEmpty)
+    checkAnswer(resultAuxTable, Row(99, Row(1L, null)))
+  }
+
+  test(
+    "Scd1ForeachBatchHandler ignores a late-arriving upsert with a sequence below the target's" +
+      " last upsert"
+  ) {
+    createAuxTable()
+    createTargetTable(Row(1, "alice", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5L))))
+
+    exec.execute(microbatchOf(sourceSchema)(Row(1, "bob", 2L, false)), batchId = 0L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(1, "alice", Row(null, 5L)))
+    assert(resultAuxTable.collect().isEmpty)
+  }
+
+  test(
+    "Scd1ForeachBatchHandler ignores a late-arriving lower-seq delete but still records the aux" +
+      " tombstone"
+  ) {
+    // The auxiliary table records every incoming delete event regardless of whether it
+    // displaces a target row, so future events at or below the same sequence are filtered
+    // consistently.
+    createAuxTable()
+    createTargetTable(Row(1, "alice", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(5L))))
+
+    exec.execute(microbatchOf(sourceSchema)(Row(1, "alice", 2L, true)), batchId = 0L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(1, "alice", Row(null, 5L)))
+    checkAnswer(resultAuxTable, Row(1, Row(2L, null)))
+  }
+
+  test(
+    "Scd1ForeachBatchHandler resolves a within-batch delete and higher-sequenced upsert as an" +
+      " upsert insert"
+  ) {
+    // Within-batch dedup picks the highest-sequenced event regardless of kind. Here an
+    // upsert at seq=3 beats a delete at seq=2, so the row is inserted into the target and
+    // no auxiliary tombstone is recorded for the per-key winner.
+    createAuxTable()
+    createTargetTable()
+
+    exec.execute(
+      microbatchOf(sourceSchema)(
+        Row(1, "alice", 2L, true),
+        Row(1, "bob", 3L, false)
+      ),
+      batchId = 0L
+    )
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(resultTargetTable, Row(1, "bob", Row(null, 3L)))
+    assert(resultAuxTable.collect().isEmpty)
+  }
+
+  test(
+    "Scd1ForeachBatchHandler treats a composite key as a single identifier and isolates rows by" +
+      " full key"
+  ) {
+    // Composite [country, city] key. Three rows that overlap on country (US, US, UK) but
+    // never on the full key must remain three distinct identities in the target.
+    val compositeSourceSchema = new StructType()
+      .add("country", StringType)
+      .add("city", StringType)
+      .add("population", LongType)
+      .add("seq", LongType)
+      .add("is_delete", BooleanType)
+    val compositeAuxSchema = new StructType()
+      .add("country", StringType)
+      .add("city", StringType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+    val compositeTargetSchema = new StructType()
+      .add("country", StringType)
+      .add("city", StringType)
+      .add("population", LongType)
+      .add(Scd1BatchProcessor.cdcMetadataColName, cdcMetadataColSchemaType())
+
+    val compositeProcessor = Scd1BatchProcessor(
+      changeArgs = ChangeArgs(
+        keys = Seq(UnqualifiedColumnName("country"), UnqualifiedColumnName("city")),
+        sequencing = F.col("seq"),
+        storedAsScdType = ScdType.Type1,
+        deleteCondition = Some(F.col("is_delete")),
+        columnSelection = Some(
+          ColumnSelection.ExcludeColumns(
+            Seq(UnqualifiedColumnName("seq"), UnqualifiedColumnName("is_delete"))
+          )
+        )
+      ),
+      resolvedSequencingType = LongType
+    )
+    val compositeExec = Scd1ForeachBatchHandler(
+      batchProcessor = compositeProcessor,
+      auxiliaryTableIdentifier = defaultAuxTableIdentifier,
+      targetTableIdentifier = defaultTargetTableIdentifier
+    )
+
+    createTable(defaultAuxIdent, defaultAuxTableIdentifier, compositeAuxSchema)
+    createTable(defaultTargetIdent, defaultTargetTableIdentifier, compositeTargetSchema)
+
+    compositeExec.execute(
+      microbatchOf(compositeSourceSchema)(
+        Row("US", "New York", 8000000L, 1L, false),
+        Row("US", "Los Angeles", 4000000L, 1L, false),
+        Row("UK", "London", 9000000L, 1L, false)
+      ),
+      batchId = 0L
+    )
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(
+      resultTargetTable.orderBy("country", "city"),
+      Seq(
+        Row("UK", "London", 9000000L, Row(null, 1L)),
+        Row("US", "Los Angeles", 4000000L, Row(null, 1L)),
+        Row("US", "New York", 8000000L, Row(null, 1L))
+      )
+    )
+    assert(resultAuxTable.collect().isEmpty)
+  }
+
+  test(
+    "Scd1ForeachBatchHandler leaves unrelated target rows untouched when only one key is updated"
+  ) {
+    createAuxTable()
+    createTargetTable(
+      Row(1, "alice", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(1L))),
+      Row(2, "bob", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(1L)))
+    )
+
+    exec.execute(microbatchOf(sourceSchema)(Row(1, "alice-updated", 2L, false)), batchId = 0L)
+
+    val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+    val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+    checkAnswer(
+      resultTargetTable.orderBy("id"),
+      Seq(
+        Row(1, "alice-updated", Row(null, 2L)),
+        Row(2, "bob", Row(null, 1L))
+      )
+    )
+    assert(resultAuxTable.collect().isEmpty)
+  }
+
+  // ===========================================================================================
+  // Case-sensitivity tests
+  // ===========================================================================================
+
+  // A processor that intentionally references columns in UPPERCASE while the suite's source,
+  // auxiliary, and target schemas use lowercase. The case-sensitivity tests below run the
+  // same execute() with this processor under different SQLConf settings to verify the
+  // session's case-sensitivity flag drives every stage of the pipeline.
+  private val mixedCaseProcessor = Scd1BatchProcessor(
+    changeArgs = ChangeArgs(
+      keys = Seq(UnqualifiedColumnName("ID")),
+      sequencing = F.col("SEQ"),
+      storedAsScdType = ScdType.Type1,
+      deleteCondition = Some(F.col("IS_DELETE")),
+      columnSelection = Some(
+        ColumnSelection.IncludeColumns(
+          Seq(UnqualifiedColumnName("ID"), UnqualifiedColumnName("VALUE"))
+        )
+      )
+    ),
+    resolvedSequencingType = LongType
+  )
+
+  private def mixedCaseExec: Scd1ForeachBatchHandler = Scd1ForeachBatchHandler(
+    batchProcessor = mixedCaseProcessor,
+    auxiliaryTableIdentifier = defaultAuxTableIdentifier,
+    targetTableIdentifier = defaultTargetTableIdentifier
+  )
+
+  test(
+    "Scd1ForeachBatchHandler honors case-insensitive analysis from the batch dataframe's session"
+  ) {
+    // Every stage of execute (validation, dedup, project target columns, tombstone
+    // application, merge to aux, merge to target) must resolve the UPPERCASE column refs in
+    // ChangeArgs against the lowercase schema and produce the correct target+aux state.
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      createAuxTable()
+      createTargetTable(Row(1, "alice", cdcMetadataRow(deleteSeq = None, upsertSeq = Some(1L))))
+
+      mixedCaseExec.execute(
+        microbatchOf(sourceSchema)(Row(1, "bob", 2L, false)),
+        batchId = 0L
+      )
+
+      val resultAuxTable = spark.read.table(defaultAuxTableIdentifier.quotedString)
+      val resultTargetTable = spark.read.table(defaultTargetTableIdentifier.quotedString)
+      checkAnswer(resultTargetTable, Row(1, "bob", Row(null, 2L)))
+      assert(resultAuxTable.collect().isEmpty)
+    }
+  }
+
+  test(
+    "Scd1ForeachBatchHandler honors case-sensitive analysis from the batch dataframe's session"
+  ) {
+    // With case-sensitive analysis, the same UPPERCASE ChangeArgs references against a
+    // lowercase schema must not be silently normalized. Execute must surface an
+    // AnalysisException rather than fall back to case-insensitive matching anywhere.
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      createAuxTable()
+      createTargetTable()
+
+      intercept[AnalysisException] {
+        mixedCaseExec.execute(
+          microbatchOf(sourceSchema)(Row(1, "bob", 2L, false)),
+          batchId = 0L
+        )
+      }
+    }
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcAuxiliaryTableSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcAuxiliaryTableSuite.scala
new file mode 100644
index 0000000000000..9fb6070c01e7a
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcAuxiliaryTableSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * Unit tests for the [[AutoCdcAuxiliaryTable]] companion object, in particular the
+ * `serializeKeyColumnNames` / `parseKeyColumnNames` round-trip helpers used to persist the
+ * AutoCDC key column names as a JSON-encoded reserved table property on the auxiliary table.
+ *
+ * These tests are intentionally session-less: the helpers are pure functions on `String` and
+ * `Seq[String]`, and verifying their byte-for-byte round-trip contract requires no Spark
+ * runtime. End-to-end persistence (DDL -> catalog -> SHOW TBLPROPERTIES) is covered by
+ * `AutoCdcScd1AuxiliaryTableDurabilitySuite`; drift-validator behavior over the parsed
+ * property is covered by `AutoCdcScd1KeyDriftSuite`.
+ */
+class AutoCdcAuxiliaryTableSuite extends SparkFunSuite {
+
+  // The drift validator stores key column names in a table property as a JSON array of strings.
+  // These round-trip tests verify that identifier text is preserved verbatim through
+  // serialize -> parse, including characters that JSON itself must escape (`"`, `\`, control
+  // chars) and characters that JSON does not touch but that downstream interpolation might
+  // (`'`, ` `, `.`, backtick). Storage at the table property level is solely the JSON layer's
+  // concern -- SQL identifier quoting (backticks) is never part of the stored bytes.
+
+  private def assertKeyColumnNamesRoundTrip(names: Seq[String]): Unit = {
+    val json = AutoCdcAuxiliaryTable.serializeKeyColumnNames(names)
+    assert(
+      AutoCdcAuxiliaryTable.parseKeyColumnNames(json).contains(names),
+      s"round-trip failed: input=${names}, serialized=${json}"
+    )
+  }
+
+  test("serializeKeyColumnNames/parseKeyColumnNames round-trip preserves plain ASCII names") {
+    assertKeyColumnNamesRoundTrip(Seq("id"))
+    assertKeyColumnNamesRoundTrip(Seq("id", "region"))
+    assertKeyColumnNamesRoundTrip(Seq("id", "region", "country"))
+  }
+
+  test("serializeKeyColumnNames/parseKeyColumnNames round-trip preserves the empty list") {
+    // Empty key sets are not user-reachable (AutoCdcMergeFlow rejects them upstream), but the
+    // helpers themselves must round-trip a `[]` JSON array faithfully.
+    assertKeyColumnNamesRoundTrip(Seq.empty)
+  }
+
+  test("serializeKeyColumnNames/parseKeyColumnNames preserves names containing JSON-escaped " +
+    "characters (quote, backslash, control chars)") {
+    // JSON serializer must escape `"` -> `\"`, `\` -> `\\`, and control chars; the parser
+    // must invert those escapes and yield the original literal bytes.
+    assertKeyColumnNamesRoundTrip(Seq("a\"b"))
+    assertKeyColumnNamesRoundTrip(Seq("a\\b"))
+    assertKeyColumnNamesRoundTrip(Seq("a\nb"))
+    assertKeyColumnNamesRoundTrip(Seq("a\tb"))
+    // Mixed: every JSON-escaped class in a single name.
+    assertKeyColumnNamesRoundTrip(Seq("a\"b\\c\nd"))
+  }
+
+  test("serializeKeyColumnNames/parseKeyColumnNames preserves names containing characters " +
+    "that JSON does not escape (single quote, dot, space, backtick)") {
+    // JSON does not escape these, but they are common in real-world identifiers (especially
+    // when users backtick-quote at the API boundary). They must flow through verbatim.
+    assertKeyColumnNamesRoundTrip(Seq("it's"))
+    assertKeyColumnNamesRoundTrip(Seq("a.b"))
+    assertKeyColumnNamesRoundTrip(Seq("name with spaces"))
+    assertKeyColumnNamesRoundTrip(Seq("a`b"))
+    // Mixed: a single composite key whose pieces collectively touch every "passes verbatim"
+    // class.
+    assertKeyColumnNamesRoundTrip(Seq("it's", "name with spaces", "a.b.c", "back`tick"))
+  }
+
+  test("parseKeyColumnNames returns None for inputs that are not a JSON array of strings") {
+    // None of these are a top-level JSON array of strings; the parser must reject every shape
+    // with `None` so callers can surface a structured INTERNAL_ERROR with consistent wording.
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("not-json").isEmpty)
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("").isEmpty)
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("\"id\"").isEmpty)        // bare string
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("null").isEmpty)
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("{\"id\": 1}").isEmpty)   // object
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("[1, 2, 3]").isEmpty)     // numbers
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("[\"id\", 1]").isEmpty)   // mixed types
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("[\"id\", null]").isEmpty)
+    assert(AutoCdcAuxiliaryTable.parseKeyColumnNames("[[\"id\"]]").isEmpty)    // nested array
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcGraphExecutionTestMixin.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcGraphExecutionTestMixin.scala
new file mode 100644
index 0000000000000..302ff789c12dd
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcGraphExecutionTestMixin.scala
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.scalatest.{BeforeAndAfterEach, Suite}
+
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.sql.classic.DataFrame
+import org.apache.spark.sql.connector.catalog.SharedTablesInMemoryRowLevelOperationTableCatalog
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.pipelines.autocdc.{
+  ChangeArgs,
+  ColumnSelection,
+  Scd1BatchProcessor,
+  ScdType,
+  UnqualifiedColumnName
+}
+import org.apache.spark.sql.pipelines.common.RunState
+import org.apache.spark.sql.pipelines.logging.RunProgress
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Shared helpers for AutoCDC end-to-end graph-execution test suites.
+ */
+trait AutoCdcGraphExecutionTestMixin extends BeforeAndAfterEach {
+  self: Suite with ExecutionTest with SharedSparkSession =>
+
+  /** v2 catalog name registered for AutoCDC E2E tests. Tests qualify tables as `cat.ns1.t`. */
+  protected val catalog: String = "cat"
+
+  /** Namespace under [[catalog]] used by AutoCDC E2E tests. */
+  protected val namespace: String = "ns1"
+
+  override protected def beforeEach(): Unit = {
+    super.beforeEach()
+    spark.conf.set(
+      s"spark.sql.catalog.$catalog",
+      classOf[SharedTablesInMemoryRowLevelOperationTableCatalog].getName
+    )
+    // Disable per-flow retries so failure-path tests (e.g. KEY_SCHEMA_DRIFT, INCOMPATIBLE_DATA)
+    // surface the AnalysisException after the first attempt instead of going through the default
+    // 2 retries, which would otherwise emit duplicate FAILED events and inflate test runtime
+    // without changing the asserted outcome.
+    spark.conf.set(SQLConf.PIPELINES_MAX_FLOW_RETRY_ATTEMPTS.key, "0")
+    spark.sql(s"CREATE NAMESPACE IF NOT EXISTS $catalog.$namespace")
+  }
+
+  override protected def afterEach(): Unit = {
+    SharedTablesInMemoryRowLevelOperationTableCatalog.reset()
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.unsetConf(s"spark.sql.catalog.$catalog")
+    spark.sessionState.conf.unsetConf(SQLConf.PIPELINES_MAX_FLOW_RETRY_ATTEMPTS.key)
+    super.afterEach()
+  }
+
+  /**
+   * Run a pipeline to completion. If any flow emitted a [[RunProgress]] event with state
+   * [[RunState.FAILED]], collect every error from the event buffer and throw a single
+   * exception listing them, so that test failures surface meaningful stack traces instead of
+   * generic "test exited normally but flow failed" errors.
+   */
+  protected def runPipeline(ctx: TestGraphRegistrationContext): Unit = {
+    val updateCtx = TestPipelineUpdateContext(spark, ctx.toDataflowGraph, storageRoot)
+    updateCtx.pipelineExecution.runPipeline()
+    updateCtx.pipelineExecution.awaitCompletion()
+
+    if (updateCtx.eventBuffer.getEvents.exists(_.details == RunProgress(RunState.FAILED))) {
+      val errors = updateCtx.eventBuffer.getEvents.flatMap(_.error)
+      val ex = new RuntimeException(
+        s"Pipeline run failed with ${errors.size} error(s):\n" +
+        errors.map { e =>
+          val stackSnippet = e.getStackTrace
+            .map(f => s"    at $f")
+            .mkString("\n")
+          s"  ${e.getClass.getSimpleName}: ${e.getMessage}\n$stackSnippet"
+        }.mkString("\n")
+      )
+      errors.foreach(ex.addSuppressed)
+      throw ex
+    }
+  }
+
+  /**
+   * Walk every [[Throwable]] reachable from `failure` via [[Throwable#getSuppressed]] and
+   * [[Throwable#getCause]] for the first [[SparkThrowable]] whose
+   * [[SparkThrowable#getCondition]] equals `condition`, then run [[checkError]] against that
+   * exception with all of its other arguments propagated through.
+   */
+  protected def checkErrorInPipelineFailure(
+      failure: Throwable,
+      condition: String,
+      sqlState: Option[String] = None,
+      parameters: Map[String, String] = Map.empty,
+      matchPVals: Boolean = false,
+      queryContext: Array[ExpectedContext] = Array.empty): Unit = {
+
+    def causeChain(t: Throwable): Iterator[Throwable] =
+      Iterator.iterate[Throwable](t)(_.getCause).takeWhile(_ != null)
+
+    def reachable: Iterator[Throwable] =
+      (Iterator(failure) ++ failure.getSuppressed.iterator).flatMap(causeChain)
+
+    val matched = reachable.collectFirst {
+      case t: SparkThrowable if t.getCondition == condition => t
+    }
+    assert(
+      matched.isDefined,
+      s"Expected a SparkThrowable with condition '$condition' reachable from the runPipeline " +
+      s"failure chain, got top-level: ${failure.getMessage}; chain:\n" +
+      reachable
+        .map(t => s"  ${t.getClass.getSimpleName}: ${t.getMessage}")
+        .mkString("\n")
+    )
+    checkError(
+      exception = matched.get,
+      condition = condition,
+      sqlState = sqlState,
+      parameters = parameters,
+      matchPVals = matchPVals,
+      queryContext = queryContext
+    )
+  }
+
+  /**
+   * DDL fragment for the AutoCDC metadata column appended to every SCD1 target table. Use
+   * inside a `CREATE TABLE` statement, for example:
+   *   `CREATE TABLE t (id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)`
+   *
+   * Assumes sequence type is BIGINT (Long).
+   */
+  protected val cdcMetadataDdl: String = {
+    val col = Scd1BatchProcessor.cdcMetadataColName
+    val del = Scd1BatchProcessor.cdcDeleteSequenceFieldName
+    val ups = Scd1BatchProcessor.cdcUpsertSequenceFieldName
+    s"$col STRUCT<$del:BIGINT,$ups:BIGINT> NOT NULL"
+  }
+
+  /**
+   * Insert a pre-existing row into a target table, populating the CDC metadata struct so the
+   * row looks as if a previous AutoCDC run upserted it at sequencing version [[sequence]].
+   *
+   * @param table     Fully-qualified table name (catalog.schema.table).
+   * @param colValues Comma-separated SQL literals for the user-defined columns, in declared
+   *                  order, excluding the trailing CDC metadata column.
+   * @param sequence  Value to seed `_cdc_metadata.upsertSequence` with. The
+   *                  `deleteSequence` field is left NULL.
+   */
+  protected def insertPreloadedRow(table: String, colValues: String, sequence: Long): Unit = {
+    val del = Scd1BatchProcessor.cdcDeleteSequenceFieldName
+    val ups = Scd1BatchProcessor.cdcUpsertSequenceFieldName
+    spark.sql(
+      s"INSERT INTO $table SELECT $colValues, " +
+      s"named_struct('$del', CAST(NULL AS BIGINT), '$ups', CAST($sequence AS BIGINT))"
+    )
+  }
+
+  /** Catalog identifier of the AutoCDC auxiliary table for [[targetTableName]]. */
+  protected def auxTableNameFor(targetTableName: String): String = {
+    val targetIdent = fullyQualifiedIdentifier(targetTableName, Some(catalog), Some(namespace))
+    AutoCdcAuxiliaryTable.identifier(targetIdent).unquotedString
+  }
+
+  /**
+   * Construct an [[AutoCdcFlow]] targeting `catalog.namespace.${target}` from the given
+   * query and CDC knobs.
+   */
+  protected def autoCdcFlow(
+      name: String,
+      target: String,
+      query: FlowFunction,
+      keys: Seq[String],
+      sequencing: Column,
+      columnSelection: Option[ColumnSelection] = None,
+      deleteCondition: Option[Column] = None,
+      scdType: ScdType = ScdType.Type1
+  ): AutoCdcFlow = AutoCdcFlow(
+    identifier = fullyQualifiedIdentifier(name, Some(catalog), Some(namespace)),
+    destinationIdentifier = fullyQualifiedIdentifier(target, Some(catalog), Some(namespace)),
+    func = query,
+    queryContext = QueryContext(
+      currentCatalog = Some(catalog),
+      currentDatabase = Some(namespace)
+    ),
+    origin = QueryOrigin.empty,
+    changeArgs = ChangeArgs(
+      keys = keys.map(UnqualifiedColumnName(_)),
+      sequencing = sequencing,
+      columnSelection = columnSelection,
+      deleteCondition = deleteCondition,
+      storedAsScdType = scdType
+    )
+  )
+
+  /**
+   * Build a single-flow AutoCDC pipeline: a [[TestGraphRegistrationContext]] that registers
+   * `target` under [[catalog]].[[namespace]] and one [[autoCdcFlow]] writing into it from
+   * `sourceDf`. Covers the common single-table/single-flow shape used across the AutoCDC E2E
+   * suites; tests that need multiple flows or non-AutoCDC datasets build the context inline.
+   */
+  protected def singleAutoCdcFlowPipeline(
+      flowName: String,
+      target: String,
+      sourceDf: DataFrame,
+      keys: Seq[String],
+      sequencing: Column,
+      columnSelection: Option[ColumnSelection] = None,
+      deleteCondition: Option[Column] = None,
+      scdType: ScdType = ScdType.Type1): TestGraphRegistrationContext =
+    new TestGraphRegistrationContext(spark) {
+      registerTable(target, catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = flowName,
+        target = target,
+        query = dfFlowFunc(sourceDf),
+        keys = keys,
+        sequencing = sequencing,
+        columnSelection = columnSelection,
+        deleteCondition = deleteCondition,
+        scdType = scdType
+      ))
+    }
+
+  /** Build a target row's `_cdc_metadata` struct value. */
+  protected def cdcMeta(deleteSeq: Option[Long], upsertSeq: Option[Long]): Row =
+    Row(deleteSeq.orNull, upsertSeq.orNull)
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1AuxiliaryTableDurabilitySuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1AuxiliaryTableDurabilitySuite.scala
new file mode 100644
index 0000000000000..3453235cbae84
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1AuxiliaryTableDurabilitySuite.scala
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.functions
+import org.apache.spark.sql.pipelines.autocdc.{
+  ColumnSelection,
+  Scd1BatchProcessor,
+  UnqualifiedColumnName
+}
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Tests covering the durability of AutoCDC's auxiliary table across pipeline runs:
+ * the per-key sequence watermarks recorded in the auxiliary table must persist between
+ * incremental runs, and the auxiliary table must be transparently recreated if it is
+ * deleted out-of-band.
+ */
+class AutoCdcScd1AuxiliaryTableDurabilitySuite
+    extends ExecutionTest
+    with SharedSparkSession
+    with AutoCdcGraphExecutionTestMixin {
+
+  test("a higher-sequence event in a later pipeline run correctly upserts the row") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Single MemoryStream reused across both pipeline runs so the streaming checkpoint can
+    // resume cleanly.
+    val changeDataFeedStream = MemoryStream[(Int, String, Long)]
+    def buildGraphRegistrationContext(): TestGraphRegistrationContext =
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = changeDataFeedStream.toDF().toDF("id", "name", "version"),
+        keys = Seq("id"),
+        sequencing = functions.col("version"))
+
+    // Run #1: insert id=1 at seq=1.
+    changeDataFeedStream.addData((1, "alice", 1L))
+    runPipeline(buildGraphRegistrationContext())
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", 1L, cdcMeta(None, Some(1L))))
+    )
+
+    // Run #2: upsert id=1 at seq=2 (must replace) and insert id=2 at seq=1 (new key).
+    // The auxiliary table from run #1 persists and continues to gate seq comparisons.
+    changeDataFeedStream.addData((1, "alice2", 2L), (2, "bob", 1L))
+    runPipeline(buildGraphRegistrationContext())
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(
+        Row(1, "alice2", 2L, cdcMeta(None, Some(2L))),
+        Row(2, "bob", 1L, cdcMeta(None, Some(1L)))
+      )
+    )
+  }
+
+  test("an event with a sequence lower than what was applied in a prior pipeline run " +
+    "is suppressed") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Single MemoryStream reused across both runs so the streaming checkpoint can resume.
+    val stream = MemoryStream[(Int, String, Long, Boolean)]
+    def buildCtx(): TestGraphRegistrationContext =
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = stream.toDF().toDF("id", "name", "version", "is_delete"),
+        keys = Seq("id"),
+        sequencing = functions.col("version"),
+        deleteCondition = Some(functions.col("is_delete") === true),
+        columnSelection = Some(ColumnSelection.ExcludeColumns(
+          Seq(UnqualifiedColumnName("is_delete"))
+        ))
+      )
+
+    // Run #1: delete id=1 at seq=10. Auxiliary table records seq=10 as the watermark.
+    stream.addData((1, "alice", 10L, true))
+    runPipeline(buildCtx())
+    checkAnswer(spark.table(s"$catalog.$namespace.target"), Seq.empty)
+
+    // Run #2: late upsert at seq=5 (< the persisted seq=10 watermark). Must be rejected.
+    stream.addData((1, "stale", 5L, false))
+    runPipeline(buildCtx())
+
+    // Auxiliary table watermark from run #1 (seq=10) should keep rejecting the seq=5 event.
+    checkAnswer(spark.table(s"$catalog.$namespace.target"), Seq.empty)
+  }
+
+  test("the auxiliary table places the AutoCDC key column first, ahead of any non-key " +
+    "source columns") {
+    val session = spark
+    import session.implicits._
+
+    // Source DF column order is (name, id, version): the AutoCDC key column `id` does NOT
+    // appear first in the source DF. The auxiliary table must still write `id` as its
+    // leading column.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(name STRING, id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(String, Int, Long)]
+    stream.addData(("alice", 1, 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream.toDF().toDF("name", "id", "version"),
+      keys = Seq("id"),
+      sequencing = functions.col("version")))
+
+    val auxSchema = spark.table(auxTableNameFor("target")).schema
+
+    // The auxiliary table only contains keys and the metadata column, hence "name" should not be
+    // included.
+    assert(auxSchema.fieldNames.toSeq == Seq("id", Scd1BatchProcessor.cdcMetadataColName))
+    assert(getAuxTableKeyColumnNames(target = "target") == Seq("id"))
+  }
+
+  test("the auxiliary table preserves the user's declared key order, independent of the " +
+    "source DataFrame and target table column orders") {
+    val session = spark
+    import session.implicits._
+
+    // Source DF: (value, id, region, version). Target table: (value, id, region, version,
+    // _cdc_metadata) -- same ordering as the source. The user, however, declares
+    // `keys = Seq("region", "id")` -- the OPPOSITE order from how those columns appear in
+    // both the source DF and the target. The auxiliary table should honor the user's
+    // declared key order, both in the persisted aux schema layout and in the
+    // [[AutoCdcAuxiliaryTable.keyColumnNamesProperty]] property value, so subsequent runs
+    // compare keys against the same recorded layout.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(value STRING, id INT NOT NULL, region STRING NOT NULL, " +
+      s"version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(String, Int, String, Long)]
+    stream.addData(("v", 1, "us", 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream.toDF().toDF("value", "id", "region", "version"),
+      keys = Seq("region", "id"),
+      sequencing = functions.col("version")))
+
+    val auxSchema = spark.table(auxTableNameFor("target")).schema
+    assert(auxSchema.fieldNames.toSeq ==
+      Seq("region", "id", Scd1BatchProcessor.cdcMetadataColName))
+    assert(getAuxTableKeyColumnNames(target = "target") == Seq("region", "id"))
+  }
+
+  test("if the AutoCDC auxiliary table is dropped between runs, it is transparently " +
+    "recreated") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Single MemoryStream reused across both runs so the streaming checkpoint can resume.
+    val stream = MemoryStream[(Int, Long)]
+    def buildCtx(): TestGraphRegistrationContext =
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = stream.toDF().toDF("id", "version"),
+        keys = Seq("id"),
+        sequencing = functions.col("version"))
+
+    stream.addData((1, 1L))
+    runPipeline(buildCtx())
+    assert(spark.catalog.tableExists(auxTableNameFor("target")))
+
+    // Manually drop the auxiliary table.
+    spark.sql(s"DROP TABLE ${auxTableNameFor("target")}")
+    assert(!spark.catalog.tableExists(auxTableNameFor("target")))
+
+    stream.addData((1, 2L))
+    runPipeline(buildCtx())
+
+    // The dropped auxiliary table must be transparently recreated.
+    assert(spark.catalog.tableExists(auxTableNameFor("target")))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, 2L, cdcMeta(None, Some(2L))))
+    )
+  }
+
+  test("auxiliary key-column-names property survives identifiers containing special " +
+    "characters that exercise both JSON and SQL string-literal escaping") {
+    val session = spark
+    import session.implicits._
+
+    // This test exercises the full identifier-text persistence path with composite keys whose
+    // names collectively cover every escape class:
+    //   - `it's`              -- single quote: not escaped by JSON; the writer must double it
+    //                            to `''` to keep the SQL TBLPROPERTIES literal well-formed.
+    //   - `name with spaces`  -- whitespace identifier: backtick-quoted in DDL, no escaping
+    //                            needed in the JSON or the property value.
+    //   - `a"b`               -- literal double quote: JSON escapes as `\"`.
+    //   - `c\d`               -- literal backslash: JSON escapes as `\\`.
+    // If any layer drops, splits, or misescapes a name, the post-run lookup of the
+    // [[AutoCdcAuxiliaryTable.keyColumnNamesProperty]] property either fails to read or
+    // returns a value that is no longer a parseable JSON array of strings.
+    val keyNames = Seq("it's", "name with spaces", "a\"b", "c\\d")
+
+    // SQL DDL identifier rendering: backticks delimit each identifier; an embedded backtick
+    // would have to be escaped by doubling, but none of these names contain one.
+    val targetTableDdl = keyNames
+      .map(name => s"`$name` STRING NOT NULL")
+      .mkString(", ") + s", version BIGINT NOT NULL, $cdcMetadataDdl"
+    spark.sql(s"CREATE TABLE $catalog.$namespace.target ($targetTableDdl)")
+
+    // The AutoCDC API runs every key through `UnqualifiedColumnName.apply`, which calls
+    // `CatalystSqlParser.parseMultipartIdentifier`. To get a single-part identifier whose
+    // text includes special characters, the API caller has to backtick-quote at the boundary;
+    // we mirror that here by wrapping each name in backticks (and doubling any embedded
+    // backtick -- not needed for these names but kept for parity with how a user would call
+    // the API).
+    val backtickQuotedKeys = keyNames.map(name => s"`${name.replace("`", "``")}`")
+
+    // Single MemoryStream reused across both runs so the streaming checkpoint can resume.
+    val stream = MemoryStream[(String, String, String, String, Long)]
+    def buildCtx(): TestGraphRegistrationContext =
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = stream.toDF().toDF((keyNames :+ "version"): _*),
+        keys = backtickQuotedKeys,
+        sequencing = functions.col("version"))
+
+    // Run #1: a single insert with arbitrary non-empty key values.
+    stream.addData(("v1", "v2", "v3", "v4", 1L))
+    runPipeline(buildCtx())
+
+    // The persisted property must round-trip every name byte-for-byte.
+    assert(getAuxTableKeyColumnNames(target = "target") == keyNames)
+
+    // Run #2: same keys, a higher sequence -- drift validation reads the property back, parses
+    // the JSON, and looks up each recorded name in the aux schema. If any layer mangled the
+    // identifier text (lost an escape, dropped a `'`, split on a `.`, ...), validation would
+    // either throw KEY_SCHEMA_DRIFT (name lookup miss) or INTERNAL_ERROR (recorded name absent
+    // from aux schema). Reaching the second run successfully proves the round-trip works.
+    stream.addData(("v1", "v2", "v3", "v4", 2L))
+    runPipeline(buildCtx())
+
+    // The persisted property is immutable across non-full-refresh runs, so it must still be
+    // intact after run #2.
+    assert(getAuxTableKeyColumnNames(target = "target") == keyNames)
+  }
+
+  private def getAuxTableKeyColumnNames(target: String): Seq[String] = {
+    val auxName = auxTableNameFor(target)
+    val rows = spark.sql(s"SHOW TBLPROPERTIES $auxName").collect()
+    val prop = rows
+      .find(_.getString(0) == AutoCdcAuxiliaryTable.keyColumnNamesProperty)
+      .getOrElse(fail(
+        s"auxiliary table $auxName is missing the " +
+        s"${AutoCdcAuxiliaryTable.keyColumnNamesProperty} property; got: ${rows.toSeq}"
+      ))
+    AutoCdcAuxiliaryTable.parseKeyColumnNames(prop.getString(1))
+      .getOrElse(fail(
+        s"auxiliary table $auxName has a malformed " +
+        s"${AutoCdcAuxiliaryTable.keyColumnNamesProperty} property: '${prop.getString(1)}'"
+      ))
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1FullRefreshSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1FullRefreshSuite.scala
new file mode 100644
index 0000000000000..94ba7e20aed1f
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1FullRefreshSuite.scala
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.functions
+import org.apache.spark.sql.pipelines.autocdc.{
+  ColumnSelection,
+  UnqualifiedColumnName
+}
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Tests covering AutoCDC's full-refresh semantics: full refresh must wipe both the
+ * target rows and the AutoCDC auxiliary table for the refreshed targets, and must leave
+ * non-refreshed targets untouched in selective-refresh mode.
+ */
+class AutoCdcScd1FullRefreshSuite
+    extends ExecutionTest
+    with SharedSparkSession
+    with AutoCdcGraphExecutionTestMixin {
+
+  test("full refresh wipes target rows and the auxiliary table for the refreshed flow") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Run #1: populate target + auxiliary table.
+    val stream1 = MemoryStream[(Int, String, Long)]
+    stream1.addData((1, "alice", 5L))
+    val ctx1 = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream1.toDF().toDF("id", "name", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    runPipeline(ctx1)
+    assert(
+      spark.catalog.tableExists(auxTableNameFor("target")),
+      "Auxiliary table should exist after first run"
+    )
+
+    // Run #2 (full refresh): auxiliary table should be dropped by DatasetManager, target
+    // truncated. The new run brings only id=2 at seq=1.
+    val stream2 = MemoryStream[(Int, String, Long)]
+    stream2.addData((2, "bob", 1L))
+    val ctx2 = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream2.toDF().toDF("id", "name", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    val updateCtx = TestPipelineUpdateContext(
+      spark,
+      ctx2.toDataflowGraph,
+      storageRoot,
+      fullRefreshTables = AllTables
+    )
+    updateCtx.pipelineExecution.runPipeline()
+    updateCtx.pipelineExecution.awaitCompletion()
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(2, "bob", 1L, cdcMeta(None, Some(1L))))
+    )
+  }
+
+  test("after a full refresh, an event with a sequence below the previous run's " +
+    "watermark now lands") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Run #1: delete at seq=10 sets a high watermark in the auxiliary table.
+    val stream1 = MemoryStream[(Int, String, Long, Boolean)]
+    stream1.addData((1, "alice", 10L, true))
+    val ctx1 = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream1.toDF().toDF("id", "name", "version", "is_delete")),
+        keys = Seq("id"),
+        sequencing = functions.col("version"),
+        deleteCondition = Some(functions.col("is_delete") === true),
+        columnSelection = Some(ColumnSelection.ExcludeColumns(
+          Seq(UnqualifiedColumnName("is_delete"))
+        ))
+      ))
+    }
+    runPipeline(ctx1)
+
+    // Run #2 (full refresh): auxiliary table is dropped, watermark reset. seq=5 should
+    // now land.
+    val stream2 = MemoryStream[(Int, String, Long, Boolean)]
+    stream2.addData((1, "fresh", 5L, false))
+    val ctx2 = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream2.toDF().toDF("id", "name", "version", "is_delete")),
+        keys = Seq("id"),
+        sequencing = functions.col("version"),
+        deleteCondition = Some(functions.col("is_delete") === true),
+        columnSelection = Some(ColumnSelection.ExcludeColumns(
+          Seq(UnqualifiedColumnName("is_delete"))
+        ))
+      ))
+    }
+    val updateCtx = TestPipelineUpdateContext(
+      spark,
+      ctx2.toDataflowGraph,
+      storageRoot,
+      fullRefreshTables = AllTables
+    )
+    updateCtx.pipelineExecution.runPipeline()
+    updateCtx.pipelineExecution.awaitCompletion()
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "fresh", 5L, cdcMeta(None, Some(5L))))
+    )
+  }
+
+  test("selective full refresh wipes only the requested target's auxiliary state") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.t_a " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.t_b " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // streamA is replaced across runs because t_a is full-refreshed in run #2 (its streaming
+    // checkpoint is reset by full-refresh, so a fresh source is fine and matches the user-visible
+    // semantics). streamB is reused across runs because t_b is NOT full-refreshed -- its
+    // streaming checkpoint must resume against the same MemoryStream instance, otherwise the
+    // seq=5 assertion below could pass for the wrong reason (the source never produced seq=5
+    // in run #2 instead of the aux watermark suppressing it).
+    val streamA1 = MemoryStream[(Int, Long)]
+    val streamB = MemoryStream[(Int, Long)]
+    streamA1.addData((1, 10L))
+    streamB.addData((1, 10L))
+    val ctx1 = new TestGraphRegistrationContext(spark) {
+      registerTable("t_a", catalog = Some(catalog), database = Some(namespace))
+      registerTable("t_b", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "flow_a",
+        target = "t_a",
+        query = dfFlowFunc(streamA1.toDF().toDF("id", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+      registerFlow(autoCdcFlow(
+        name = "flow_b",
+        target = "t_b",
+        query = dfFlowFunc(streamB.toDF().toDF("id", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    runPipeline(ctx1)
+
+    // Run #2: full refresh ONLY on t_a; t_b's auxiliary state must persist.
+    val streamA2 = MemoryStream[(Int, Long)]
+    streamA2.addData((1, 5L))   // would have been suppressed pre-refresh; now wins
+    streamB.addData((1, 5L))    // must be suppressed (auxiliary table retains seq=10)
+    val ctx2 = new TestGraphRegistrationContext(spark) {
+      registerTable("t_a", catalog = Some(catalog), database = Some(namespace))
+      registerTable("t_b", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "flow_a",
+        target = "t_a",
+        query = dfFlowFunc(streamA2.toDF().toDF("id", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+      registerFlow(autoCdcFlow(
+        name = "flow_b",
+        target = "t_b",
+        query = dfFlowFunc(streamB.toDF().toDF("id", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    val updateCtx = TestPipelineUpdateContext(
+      spark,
+      ctx2.toDataflowGraph,
+      storageRoot,
+      fullRefreshTables = SomeTables(Set(
+        fullyQualifiedIdentifier("t_a", Some(catalog), Some(namespace))
+      ))
+    )
+    updateCtx.pipelineExecution.runPipeline()
+    updateCtx.pipelineExecution.awaitCompletion()
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.t_a"),
+      Seq(Row(1, 5L, cdcMeta(None, Some(5L))))
+    )
+    // t_b: pre-existing seq=10 row still wins; the seq=5 event is dropped.
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.t_b"),
+      Seq(Row(1, 10L, cdcMeta(None, Some(10L))))
+    )
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1KeyDriftSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1KeyDriftSuite.scala
new file mode 100644
index 0000000000000..fc7706c84e3ee
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1KeyDriftSuite.scala
@@ -0,0 +1,457 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.apache.spark.sql.classic.DataFrame
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.MetadataBuilder
+
+/**
+ * End-to-end tests covering AutoCDC SCD1 key-drift validation: the AutoCDC flow's declared
+ * keys are validated against the auxiliary table's recorded keys at flow execution-init
+ * time. A change in keys across runs without a full refresh corrupts the merge semantics
+ * (rows mis-routed between insert/update); validation detects this and fails fast with a
+ * structured [[AUTOCDC_INVALID_STATE]] error.
+ *
+ * Each test seeds the auxiliary table by running a first pipeline with one set of keys, then
+ * runs a second pipeline with a different shape (new keys, dropped keys, swapped keys, drifted
+ * dataType, or with a tampered auxiliary table) and asserts on the structured failure.
+ */
+class AutoCdcScd1KeyDriftSuite
+    extends ExecutionTest
+    with SharedSparkSession
+    with AutoCdcGraphExecutionTestMixin {
+
+  import testImplicits._
+
+  test("a pipeline execution that adds a key column to an existing AutoCDC flow triggers " +
+    "KEY_SCHEMA_DRIFT") {
+    // Target table carries both candidate key columns up-front so only the AutoCDC `keys`
+    // declaration differs between the two pipelines.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, region STRING NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1 declares one key (`id`). Aux table is created with schema (id, _cdc_metadata).
+    val stream1 = MemoryStream[(Int, String, Long)]
+    stream1.addData((1, "us", 1L))
+    runPipeline(buildPipeline("flow_v1", stream1.toDF().toDF("id", "region", "version"), Seq("id")))
+
+    // Pipeline #2 declares two keys (`region` + `id`) - arity drift.
+    val stream2 = MemoryStream[(Int, String, Long)]
+    stream2.addData((1, "us", 2L))
+    val ctx2 = buildPipeline(
+      "flow_v2", stream2.toDF().toDF("id", "region", "version"), Seq("region", "id"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.KEY_SCHEMA_DRIFT",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow_v2", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("target"),
+        // `region` is nullable here because Scala `String` is a reference type and the
+        // [[MemoryStream]] tuple encoder treats reference types as nullable. Only Scala
+        // primitives (`Int`, `Long`, ...) yield `NOT NULL` columns.
+        "expectedKeySchema" -> "region STRING,id INT NOT NULL",
+        "recordedKeySchema" -> "id INT NOT NULL"
+      )
+    )
+  }
+
+  test("a pipeline execution that drops a key column from an existing AutoCDC flow triggers " +
+    "KEY_SCHEMA_DRIFT") {
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(region STRING NOT NULL, id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1 declares two keys [region, id]. Without strict-equality, the dropped `region`
+    // would slip through with `id` silently matching at position 0 of the recorded schema.
+    val stream1 = MemoryStream[(String, Int, Long)]
+    stream1.addData(("us", 1, 1L))
+    runPipeline(buildPipeline(
+      "flow_v1", stream1.toDF().toDF("region", "id", "version"), Seq("region", "id")))
+
+    // Pipeline #2 declares only [id] - arity drift.
+    val stream2 = MemoryStream[(String, Int, Long)]
+    stream2.addData(("us", 1, 2L))
+    val ctx2 = buildPipeline("flow_v2", stream2.toDF().toDF("region", "id", "version"), Seq("id"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.KEY_SCHEMA_DRIFT",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow_v2", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("target"),
+        "expectedKeySchema" -> "id INT NOT NULL",
+        // `region` is nullable here because Scala `String` is a reference type; see the
+        // analogous comment in the "adds a key column" test above.
+        "recordedKeySchema" -> "region STRING,id INT NOT NULL"
+      )
+    )
+  }
+
+  test("a pipeline execution that swaps a key in an existing AutoCDC flow for a different name " +
+    "(same arity) triggers KEY_SCHEMA_DRIFT") {
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, region STRING NOT NULL, country STRING NOT NULL, " +
+      s"version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1 declares [id, region].
+    val stream1 = MemoryStream[(Int, String, String, Long)]
+    stream1.addData((1, "us", "USA", 1L))
+    runPipeline(buildPipeline(
+      "flow_v1", stream1.toDF().toDF("id", "region", "country", "version"), Seq("id", "region")))
+
+    // Pipeline #2 declares [id, country] - same arity, different key set. An arity-only check
+    // would silently match `id` at position 0 and the swapped `region`/`country` would slip
+    // through; the by-name set comparison must catch it.
+    val stream2 = MemoryStream[(Int, String, String, Long)]
+    stream2.addData((1, "us", "USA", 2L))
+    val ctx2 = buildPipeline(
+      "flow_v2", stream2.toDF().toDF("id", "region", "country", "version"), Seq("id", "country"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.KEY_SCHEMA_DRIFT",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow_v2", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("target"),
+        // `country` and `region` are nullable here because Scala `String` is a reference type;
+        // see the analogous comment in the "adds a key column" test above.
+        "expectedKeySchema" -> "id INT NOT NULL,country STRING",
+        "recordedKeySchema" -> "id INT NOT NULL,region STRING"
+      )
+    )
+  }
+
+  test("a pipeline whose recorded aux key dataType differs from the flow's source dataType " +
+    "triggers KEY_SCHEMA_DRIFT") {
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    spark.sql(
+      s"""CREATE TABLE ${auxTableNameFor("target")} (id BIGINT NOT NULL, $cdcMetadataDdl) """ +
+      s"""TBLPROPERTIES ('${AutoCdcAuxiliaryTable.keyColumnNamesProperty}' = '["id"]')"""
+    )
+
+    val stream = MemoryStream[(Int, Long)]
+    stream.addData((1, 1L))
+    val ctx = buildPipeline("flow", stream.toDF().toDF("id", "version"), Seq("id"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.KEY_SCHEMA_DRIFT",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("target"),
+        "expectedKeySchema" -> "id INT NOT NULL",
+        "recordedKeySchema" -> "id BIGINT NOT NULL"
+      )
+    )
+  }
+
+  test("a composite key reorder ([a,b] -> [b,a]) does NOT trigger drift validation") {
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(a INT NOT NULL, b STRING NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1 declares keys [a, b] (in that order). Drift validation is order-independent:
+    // the recorded ordering is purely cosmetic for human-readable error messages and must not
+    // gate semantic equivalence, since the merge semantics depend only on the *set* of key
+    // columns and their dataTypes.
+    val stream1 = MemoryStream[(Int, String, Long)]
+    stream1.addData((1, "x", 1L))
+    runPipeline(buildPipeline("flow_v1", stream1.toDF().toDF("a", "b", "version"), Seq("a", "b")))
+
+    // Pipeline #2 declares the same key set in the reversed order [b, a]. Must NOT throw.
+    val stream2 = MemoryStream[(Int, String, Long)]
+    stream2.addData((2, "y", 1L))
+    runPipeline(buildPipeline("flow_v2", stream2.toDF().toDF("a", "b", "version"), Seq("b", "a")))
+  }
+
+  test("a pipeline execution that changes a key column's nullability or metadata in an " +
+    "existing AutoCDC flow does NOT trigger drift") {
+    // Drift validation compares (name, dataType) pairs as a set. Nullability and column
+    // metadata are part of [[StructField]] but not part of [[DataType]], so they do not gate
+    // semantic equivalence: only the wire-format data type matters for merge correctness.
+    // Target's `id` is nullable so the second pipeline's nullable-`id` source is accepted.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1: source carries `id INT NOT NULL` (Scala primitive `Int`), no metadata.
+    val stream1 = MemoryStream[(Int, Long)]
+    stream1.addData((1, 1L))
+    runPipeline(buildPipeline("flow_v1", stream1.toDF().toDF("id", "version"), Seq("id")))
+
+    // Pipeline #2: source carries `id INT` (nullable, via `Option[Int]`) AND attaches
+    // non-empty column metadata. Same name and `dataType` as the recorded key, but every
+    // [[StructField]] aspect outside `dataType` differs.
+    val stream2 = MemoryStream[(Option[Int], Long)]
+    stream2.addData((Some(2), 2L))
+    val baseDf = stream2.toDF().toDF("id", "version")
+    val md = new MetadataBuilder()
+      .putString("description", "primary key")
+      .build()
+    val sourceDfWithMetadata = baseDf.select(baseDf("id").as("id", md), baseDf("version"))
+    runPipeline(buildPipeline("flow_v2", sourceDfWithMetadata, Seq("id")))
+  }
+
+  test("a pipeline execution that wraps an existing AutoCDC flow's key in backticks does NOT " +
+    "trigger drift") {
+    // Backticks are a SQL-parse syntactic device, not part of the identifier itself. A user
+    // adding or removing backticks around the same logical column must NOT be detected as drift.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream1 = MemoryStream[(Int, Long)]
+    stream1.addData((1, 1L))
+    runPipeline(buildPipeline("flow_v1", stream1.toDF().toDF("id", "version"), Seq("id")))
+
+    val stream2 = MemoryStream[(Int, Long)]
+    stream2.addData((2, 1L))
+    runPipeline(buildPipeline("flow_v2", stream2.toDF().toDF("id", "version"), Seq("`id`")))
+  }
+
+  test("a pipeline execution that drops backticks around an existing AutoCDC flow's " +
+    "previously-backtick-quoted key does NOT trigger drift") {
+    // The reverse direction of the previous test: drift validation must be backtick-invariant
+    // on both the WRITE side (recorded property strips backticks when serializing the key
+    // names in pipeline #1) and the READ side (resolver-aware lookup strips backticks when
+    // pipeline #2's expected keys are matched against the recorded set).
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream1 = MemoryStream[(Int, Long)]
+    stream1.addData((1, 1L))
+    runPipeline(buildPipeline("flow_v1", stream1.toDF().toDF("id", "version"), Seq("`id`")))
+
+    val stream2 = MemoryStream[(Int, Long)]
+    stream2.addData((2, 1L))
+    runPipeline(buildPipeline("flow_v2", stream2.toDF().toDF("id", "version"), Seq("id")))
+  }
+
+  test("under spark.sql.caseSensitive = true, an AutoCDC flow whose key differs only in case " +
+    "from the recorded key triggers KEY_SCHEMA_DRIFT") {
+    // validateNoAutoCdcKeyDrift uses spark.sessionState.conf.resolver, so its behavior on
+    // `Id` vs `id` flips with the session conf. Pin the case-sensitive direction: pipeline #1
+    // seeds the aux table under the default resolver with recorded key `["id"]`, then
+    // pipeline #2 runs under the case-sensitive resolver with key `["Id"]`. Because `Id` and
+    // `id` are distinct identifiers under that resolver, drift validation must fail.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream1 = MemoryStream[(Int, Long)]
+    stream1.addData((1, 1L))
+    runPipeline(buildPipeline("flow_v1", stream1.toDF().toDF("id", "version"), Seq("id")))
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      // Source DF column is `Id` (capital) so the AutoCDC flow's own key-presence check
+      // (`requireKeysPresentInSelectedSchema`) succeeds under case-sensitive analysis.
+      // Drift validation is then the only remaining failure mode and it must fire.
+      val stream2 = MemoryStream[(Int, Long)]
+      stream2.addData((1, 2L))
+      val ctx2 = buildPipeline("flow_v2", stream2.toDF().toDF("Id", "version"), Seq("Id"))
+
+      val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+      checkErrorInPipelineFailure(
+        failure = ex,
+        condition = "AUTOCDC_INVALID_STATE.KEY_SCHEMA_DRIFT",
+        sqlState = Some("42000"),
+        parameters = Map(
+          "flowName" ->
+            fullyQualifiedIdentifier("flow_v2", Some(catalog), Some(namespace)).unquotedString,
+          "auxTableName" -> auxTableNameFor("target"),
+          "expectedKeySchema" -> "Id INT NOT NULL",
+          "recordedKeySchema" -> "id INT NOT NULL"
+        )
+      )
+    }
+  }
+
+  test("under the default (case-insensitive) resolver, an AutoCDC flow whose key differs only " +
+    "in case from the recorded key does NOT trigger drift") {
+    // Pairs with the case-sensitive test above: same recorded key, but under the default
+    // resolver the two identifiers are equivalent so drift validation must accept pipeline
+    // #2. This pins the negative direction so a regression that accidentally hard-codes a
+    // case-sensitive resolver in the validator is caught.
+    //
+    // Note that only the *key declaration* (`Seq("Id")`) has different casing here -- the
+    // source DF column name still matches the target's `id` exactly. Differing the source DF
+    // column casing as well would not exercise drift: [[SchemaMergingUtils.mergeSchemas]] is
+    // case-sensitive on column names and would add `Id` as a new column to the target,
+    // producing AMBIGUOUS_REFERENCE during the streaming write rather than letting drift
+    // validation make the call.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream1 = MemoryStream[(Int, Long)]
+    stream1.addData((1, 1L))
+    runPipeline(buildPipeline("flow_v1", stream1.toDF().toDF("id", "version"), Seq("id")))
+
+    val stream2 = MemoryStream[(Int, Long)]
+    stream2.addData((1, 2L))
+    runPipeline(buildPipeline("flow_v2", stream2.toDF().toDF("id", "version"), Seq("Id")))
+  }
+
+  test("a pipeline whose aux table is missing the keyColumnNames property fails with " +
+    "AUXILIARY_TABLE_PROPERTY_MISSING") {
+    // Pre-create the aux table directly without the [[keyColumnNamesProperty]] to simulate
+    // corrupt metadata (e.g. user ran `ALTER TABLE ... UNSET TBLPROPERTIES`). Validation must
+    // surface a structured AUTOCDC_INVALID_STATE error rather than silently mis-validating keys.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    spark.sql(
+      s"CREATE TABLE ${auxTableNameFor("target")} (id INT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, Long)]
+    stream.addData((1, 1L))
+    val ctx = buildPipeline("flow", stream.toDF().toDF("id", "version"), Seq("id"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.AUXILIARY_TABLE_PROPERTY_MISSING",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("target"),
+        "propertyName" -> AutoCdcAuxiliaryTable.keyColumnNamesProperty
+      )
+    )
+  }
+
+  test("a pipeline whose aux table has a malformed keyColumnNames property fails with " +
+    "AUXILIARY_TABLE_PROPERTY_MALFORMED") {
+    // Pre-create the aux table directly with a non-JSON-array property value to simulate
+    // corrupt metadata. Validation must surface a structured AUTOCDC_INVALID_STATE error
+    // rather than letting a parse exception leak.
+    val malformedKeysArray = "not-a-json-array"
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    spark.sql(
+      s"CREATE TABLE ${auxTableNameFor("target")} (id INT NOT NULL, $cdcMetadataDdl) " +
+      s"TBLPROPERTIES ('${AutoCdcAuxiliaryTable.keyColumnNamesProperty}' = '$malformedKeysArray')"
+    )
+
+    val stream = MemoryStream[(Int, Long)]
+    stream.addData((1, 1L))
+    val ctx = buildPipeline("flow", stream.toDF().toDF("id", "version"), Seq("id"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.AUXILIARY_TABLE_PROPERTY_MALFORMED",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("target"),
+        "propertyName" -> AutoCdcAuxiliaryTable.keyColumnNamesProperty,
+        "rawValue" -> malformedKeysArray
+      )
+    )
+  }
+
+  test("a pipeline whose aux table records a key absent from its schema fails with " +
+    "AUXILIARY_TABLE_KEY_COLUMN_MISSING") {
+    // Pre-create the aux table directly with the [[keyColumnNamesProperty]] pointing at a
+    // column that does not exist in the aux schema. This is either a write-path implementation
+    // bug or external user tampering (e.g. dropping the key column); validation must surface a
+    // structured AUTOCDC_INVALID_STATE error rather than KEY_SCHEMA_DRIFT, because the drift
+    // validator cannot run without resolving every recorded key first.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    spark.sql(
+      s"""CREATE TABLE ${auxTableNameFor("target")} (id INT NOT NULL, $cdcMetadataDdl) """ +
+      s"""TBLPROPERTIES ('${AutoCdcAuxiliaryTable.keyColumnNamesProperty}' = '["region"]')"""
+    )
+
+    val stream = MemoryStream[(Int, Long)]
+    stream.addData((1, 1L))
+    val ctx = buildPipeline("flow", stream.toDF().toDF("id", "version"), Seq("id"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.AUXILIARY_TABLE_KEY_COLUMN_MISSING",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("target"),
+        "keyColumnName" -> "region",
+        "propertyName" -> AutoCdcAuxiliaryTable.keyColumnNamesProperty
+      )
+    )
+  }
+
+  /**
+   * Build a single-flow pipeline targeting `cat.ns1.target` with the given source DF and key
+   * column list. Thin wrapper over [[singleAutoCdcFlowPipeline]] since every drift test targets
+   * the same `target` table.
+   */
+  private def buildPipeline(
+      flowName: String,
+      sourceDf: DataFrame,
+      keys: Seq[String]): TestGraphRegistrationContext =
+    singleAutoCdcFlowPipeline(
+      flowName = flowName,
+      target = "target",
+      sourceDf = sourceDf,
+      keys = keys,
+      sequencing = $"version")
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1MultiPipelineSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1MultiPipelineSuite.scala
new file mode 100644
index 0000000000000..2100928bc68af
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1MultiPipelineSuite.scala
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * End-to-end tests that exercise interactions between separate AutoCDC pipelines (i.e.
+ * distinct [[DataflowGraph]] / [[TestPipelineUpdateContext]] invocations) sharing the same
+ * v2 catalog. These complement the single-pipeline AutoCDC suites by validating the
+ * boundary semantics between independently-deployed pipelines.
+ *
+ * Each test constructs two graphs and runs them sequentially. In real deployments these
+ * could be two different pipeline definitions writing into the same metastore; the tests
+ * here verify that AutoCDC's per-target catalog state (target table, auxiliary table,
+ * schema invariants) behaves correctly across these pipeline boundaries.
+ */
+class AutoCdcScd1MultiPipelineSuite
+    extends ExecutionTest
+    with SharedSparkSession
+    with AutoCdcGraphExecutionTestMixin {
+
+  test("two AutoCDC pipelines targeting separate tables maintain independent target and " +
+    "auxiliary tables") {
+    val session = spark
+    import session.implicits._
+
+    // Two distinct target tables created up-front.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.t_a " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.t_b " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1 only knows about `t_a`. Its auxiliary table
+    // cat.ns1.__spark_autocdc_aux_state_t_a must not affect pipeline #2's `t_b`.
+    val streamA = MemoryStream[(Int, String, Long)]
+    streamA.addData((1, "alice", 100L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "flow_a",
+      target = "t_a",
+      sourceDf = streamA.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = $"version"))
+
+    // Pipeline #2 only knows about `t_b`. Uses a deliberately *lower* sequence to verify
+    // the watermark from pipeline #1's auxiliary table (seq=100) does not leak into
+    // pipeline #2.
+    val streamB = MemoryStream[(Int, String, Long)]
+    streamB.addData((9, "bob", 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "flow_b",
+      target = "t_b",
+      sourceDf = streamB.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = $"version"))
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.t_a"),
+      Seq(Row(1, "alice", 100L, cdcMeta(None, Some(100L))))
+    )
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.t_b"),
+      Seq(Row(9, "bob", 1L, cdcMeta(None, Some(1L))))
+    )
+
+    // Each target has its own auxiliary table; no cross-contamination.
+    assert(spark.catalog.tableExists(auxTableNameFor("t_a")))
+    assert(spark.catalog.tableExists(auxTableNameFor("t_b")))
+  }
+
+  test("a downstream pipeline can read an AutoCDC target written by a different pipeline " +
+    "without observing the CDC metadata column") {
+    val session = spark
+    import session.implicits._
+
+    // Pipeline #1 writes into target `src` via AutoCDC.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.src " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    val stream = MemoryStream[(Int, String, Long)]
+    stream.addData((1, "alice", 1L), (2, "bob", 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "writer",
+      target = "src",
+      sourceDf = stream.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = $"version"))
+
+    // Pipeline #2 is a regular materialized view that selects the user-data columns from
+    // `src` (a different graph entirely). It must observe the merged AutoCDC rows and be
+    // able to ignore the metadata column without it polluting downstream consumers.
+    val ctxReader = new TestGraphRegistrationContext(spark) {
+      registerMaterializedView(
+        "downstream_mv",
+        query = dfFlowFunc(
+          spark.read.table(s"$catalog.$namespace.src").select("id", "name", "version")
+        )
+      )
+    }
+    runPipeline(ctxReader)
+
+    checkAnswer(
+      spark.table(fullyQualifiedIdentifier("downstream_mv").toString),
+      Seq(Row(1, "alice", 1L), Row(2, "bob", 1L))
+    )
+  }
+
+  test("two AutoCDC pipelines targeting the same table with identical key and data " +
+    "schemas merge into a shared target table") {
+    val session = spark
+    import session.implicits._
+
+    // Target table is created once up-front; both pipelines target it with the same
+    // AutoCDC `keys` and the same source-DF data schema. The two pipelines have distinct
+    // flow names ("flow_v1" / "flow_v2") so they own independent streaming checkpoints,
+    // but share the target table and its auxiliary table.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.shared_target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1: inserts rows with id=1 and id=2 at version=1.
+    val stream1 = MemoryStream[(Int, String, Long)]
+    stream1.addData((1, "alice", 1L), (2, "bob", 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "flow_v1",
+      target = "shared_target",
+      sourceDf = stream1.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = $"version"))
+
+    // Sanity-check pipeline #1's effect before pipeline #2 runs.
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.shared_target"),
+      Seq(
+        Row(1, "alice", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L))),
+        Row(2, "bob", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L)))
+      )
+    )
+
+    // Pipeline #2: updates id=2 (existing key) to a higher sequence and inserts id=3
+    // (new key). id=1 is untouched and must survive into the final target unchanged.
+    val stream2 = MemoryStream[(Int, String, Long)]
+    stream2.addData((2, "bob-v2", 2L), (3, "carol", 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "flow_v2",
+      target = "shared_target",
+      sourceDf = stream2.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = $"version"))
+
+    // Final target: id=1 untouched (pipeline #1's state), id=2 updated by pipeline #2,
+    // id=3 freshly inserted by pipeline #2.
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.shared_target"),
+      Seq(
+        Row(1, "alice", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L))),
+        Row(2, "bob-v2", 2L, cdcMeta(deleteSeq = None, upsertSeq = Some(2L))),
+        Row(3, "carol", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L)))
+      )
+    )
+
+    // The auxiliary table for the shared target is itself shared across both pipelines.
+    assert(spark.catalog.tableExists(auxTableNameFor("shared_target")))
+  }
+
+  test("two AutoCDC pipelines targeting the same table with the same key but different " +
+    "data columns evolve the shared target schema") {
+    val session = spark
+    import session.implicits._
+
+    // Target is created up-front with pipeline #1's schema only; pipeline #2 brings a new
+    // top-level nullable `age` column that the dataset materialization layer is expected
+    // to schema-merge into the target.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.shared_target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1: source DF schema is (id, name, version); inserts id=1 and id=2.
+    val stream1 = MemoryStream[(Int, String, Long)]
+    stream1.addData((1, "alice", 1L), (2, "bob", 1L))
+    val ctx1 = singleAutoCdcFlowPipeline(
+      flowName = "flow_v1",
+      target = "shared_target",
+      sourceDf = stream1.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = $"version")
+    runPipeline(ctx1)
+
+    // Sanity-check pipeline #1's state before schema evolution kicks in.
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.shared_target"),
+      Seq(
+        Row(1, "alice", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L))),
+        Row(2, "bob", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L)))
+      )
+    )
+
+    // Pipeline #2: source DF schema is (id, name, age, version). The new nullable `age` column
+    // should be added to the target by dataset materialization; pipeline #1's untouched id=1 row
+    // is backfilled to NULL.
+    val stream2 = MemoryStream[(Int, String, Option[Int], Long)]
+    stream2.addData((2, "bob-v2", Some(25), 2L), (3, "carol", Some(30), 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "flow_v2",
+      target = "shared_target",
+      sourceDf = stream2.toDF().toDF("id", "name", "age", "version"),
+      keys = Seq("id"),
+      sequencing = $"version"))
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.shared_target"),
+      Seq(
+        Row(1, "alice", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L)), null),
+        Row(2, "bob-v2", 2L, cdcMeta(deleteSeq = None, upsertSeq = Some(2L)), 25),
+        Row(3, "carol", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L)), 30)
+      )
+    )
+
+    // Pipeline #1 runs again with its original (id, name, version) schema. The evolved
+    // target schema with `age` must persist: id=1's update leaves age untouched, id=4 is
+    // inserted with age=NULL, and pipeline #2's id=2/id=3 rows are unchanged.
+    stream1.addData((1, "alice-v2", 2L), (4, "dave", 1L))
+    runPipeline(ctx1)
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.shared_target"),
+      Seq(
+        Row(1, "alice-v2", 2L, cdcMeta(deleteSeq = None, upsertSeq = Some(2L)), null),
+        Row(2, "bob-v2", 2L, cdcMeta(deleteSeq = None, upsertSeq = Some(2L)), 25),
+        Row(3, "carol", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L)), 30),
+        Row(4, "dave", 1L, cdcMeta(deleteSeq = None, upsertSeq = Some(1L)), null)
+      )
+    )
+  }
+
+  test("a second pipeline targeting an existing AutoCDC table with different keys " +
+    "fails with KEY_SCHEMA_DRIFT") {
+    val session = spark
+    import session.implicits._
+
+    // Target table with both candidate keys present so the second pipeline would otherwise
+    // be schema-compatible with the first; only the AutoCDC `keys` differ between flows.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.shared_target " +
+      s"(id INT NOT NULL, name STRING NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Pipeline #1: AutoCDC flow keyed on `id`. Materializes the auxiliary table with schema
+    // (id, _cdc_metadata).
+    val stream1 = MemoryStream[(Int, String, Long)]
+    stream1.addData((1, "alice", 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "flow_v1",
+      target = "shared_target",
+      sourceDf = stream1.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = $"version"))
+
+    // Pipeline #2: completely separate graph, but targets the same physical `shared_target`
+    // table with `keys = Seq("name")`.
+    val stream2 = MemoryStream[(Int, String, Long)]
+    stream2.addData((2, "alice", 1L))
+    val ctx2 = singleAutoCdcFlowPipeline(
+      flowName = "flow_v2",
+      target = "shared_target",
+      sourceDf = stream2.toDF().toDF("id", "name", "version"),
+      keys = Seq("name"),
+      sequencing = $"version")
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_INVALID_STATE.KEY_SCHEMA_DRIFT",
+      sqlState = Some("42000"),
+      parameters = Map(
+        "flowName" ->
+          fullyQualifiedIdentifier("flow_v2", Some(catalog), Some(namespace)).unquotedString,
+        "auxTableName" -> auxTableNameFor("shared_target"),
+        // Pipeline #2's AutoCDC key resolves from the source DF, where `MemoryStream[(Int, String,
+        // Long)]` produces a nullable StringType for `name`.
+        "expectedKeySchema" -> "name STRING",
+        // Pipeline #1 persisted the aux table from a source DF whose `id` was a non-null Scala
+        // primitive (`Int`), so the recorded key carries `NOT NULL`.
+        "recordedKeySchema" -> "id INT NOT NULL"
+      )
+    )
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1SchemaEvolutionSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1SchemaEvolutionSuite.scala
new file mode 100644
index 0000000000000..b6c8f2179b7f1
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1SchemaEvolutionSuite.scala
@@ -0,0 +1,670 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import java.sql.Timestamp
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.functions
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.pipelines.autocdc.{
+  ColumnSelection,
+  UnqualifiedColumnName
+}
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Tests covering AutoCDC's interaction with non-key schema evolution across pipeline runs. The
+ * suite documents the supported additive cases (new top-level columns, new nested fields in
+ * array-of-struct, broadening / narrowing column selection) and the cases that fail loudly
+ * today (subtractive nested evolution, type-incompatible changes, case-only renames).
+ *
+ * These behaviors are largely inherited from the lower layers (`SchemaMergingUtils` for
+ * schema merge, the v2 writer's column-resolution layer for nested-field handling) rather
+ * than implemented in AutoCDC itself; the tests here serve as the contract for AutoCDC's
+ * observable behavior on top of those layers.
+ */
+class AutoCdcScd1SchemaEvolutionSuite
+    extends ExecutionTest
+    with SharedSparkSession
+    with AutoCdcGraphExecutionTestMixin {
+
+  test("a nullable non-key column merges correctly with mixed NULL and non-NULL values") {
+    val session = spark
+    import session.implicits._
+
+    // Single MemoryStream with `email` as nullable from the start. Run #1 emits a row with
+    // a NULL email; run #2 emits an upsert with a non-NULL email.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, email STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, String, Option[String], Long)]
+    def buildCtx(): TestGraphRegistrationContext =
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = stream.toDF().toDF("id", "name", "email", "version"),
+        keys = Seq("id"),
+        sequencing = functions.col("version"))
+
+    // Run #1: insert with NULL email.
+    stream.addData((1, "alice", None, 1L))
+    runPipeline(buildCtx())
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", null, 1L, cdcMeta(None, Some(1L))))
+    )
+
+    // Run #2: upsert with non-NULL email at higher seq replaces the row.
+    stream.addData((1, "alice2", Some("a@x.com"), 2L))
+    runPipeline(buildCtx())
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice2", "a@x.com", 2L, cdcMeta(None, Some(2L))))
+    )
+  }
+
+  test("widening a non-key column's type between runs fails with " +
+    "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE") {
+    val session = spark
+    import session.implicits._
+
+    // Changing a non-key column's type between pipeline runs is rejected by
+    // `SchemaMergingUtils` with CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE even when the new type
+    // is strictly wider. Users must full-refresh the target to change column types.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, age INT, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream1 = MemoryStream[(Int, Int, Long)]
+    stream1.addData((1, 30, 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream1.toDF().toDF("id", "age", "version"),
+      keys = Seq("id"),
+      sequencing = functions.col("version")))
+
+    // Run #2: widen `age` from Int to Long.
+    val stream2 = MemoryStream[(Int, Long, Long)]
+    stream2.addData((1, 31L, 2L))
+    val ctx2 = singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream2.toDF().toDF("id", "age", "version"),
+      keys = Seq("id"),
+      sequencing = functions.col("version"))
+    val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      sqlState = Some("42825"),
+      // `left` is the persisted (run #1) INT type; `right` is run #2's widened BIGINT.
+      parameters = Map(
+        "left" -> "\"INT\"",
+        "right" -> "\"BIGINT\""
+      )
+    )
+  }
+
+  test("narrowing a non-key column's type between runs fails with " +
+    "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE") {
+    val session = spark
+    import session.implicits._
+
+    // Mirror image of the widening test above: changing a non-key column's type between
+    // pipeline runs is rejected by SchemaMergingUtils with CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE
+    // even when the new type is strictly narrower.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, payload BIGINT, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream1 = MemoryStream[(Int, Long, Long)]
+    stream1.addData((1, 100L, 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream1.toDF().toDF("id", "payload", "version"),
+      keys = Seq("id"),
+      sequencing = functions.col("version")))
+
+    // Run #2: narrow `payload` from Long (BIGINT) to Int (INT).
+    val stream2 = MemoryStream[(Int, Int, Long)]
+    stream2.addData((1, 5, 2L))
+    val ctx2 = singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream2.toDF().toDF("id", "payload", "version"),
+      keys = Seq("id"),
+      sequencing = functions.col("version"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      sqlState = Some("42825"),
+      // `left` is the persisted (run #1) BIGINT type; `right` is run #2's narrowed INT.
+      parameters = Map(
+        "left" -> "\"BIGINT\"",
+        "right" -> "\"INT\""
+      )
+    )
+  }
+
+  test("a new top-level nullable column appearing in the source DF between runs is " +
+    "added to the target") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Single MemoryStream of (id, name, email, version) shared across runs so the streaming
+    // checkpoint can resume cleanly. Run #1's flow drops `email` so the source's resolved DF
+    // schema is 3 columns; run #2 keeps all 4. The MemoryStream's underlying tuple schema is
+    // unchanged (only the downstream projection differs), so the source identity that the
+    // OffsetSeqLog records is stable across runs.
+    val stream = MemoryStream[(Int, String, Option[String], Long)]
+    def buildCtx(includeEmail: Boolean): TestGraphRegistrationContext = {
+      val sourceDf = stream.toDF().toDF("id", "name", "email", "version")
+      val projectedDf = if (includeEmail) sourceDf else sourceDf.drop("email")
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = projectedDf,
+        keys = Seq("id"),
+        sequencing = functions.col("version"))
+    }
+
+    // Run #1: source projects (id, name, version). Target schema is unchanged.
+    stream.addData((1, "alice", None, 1L))
+    runPipeline(buildCtx(includeEmail = false))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", 1L, cdcMeta(None, Some(1L))))
+    )
+
+    // Run #2: source projects (id, name, email, version). mergeSchemas appends `email` to
+    // the target (StructType.merge keeps the left schema's order and appends right-only
+    // fields); existing rows get NULL for the new column.
+    stream.addData((2, "bob", Some("b@x.com"), 2L))
+    runPipeline(buildCtx(includeEmail = true))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(
+        Row(1, "alice", 1L, cdcMeta(None, Some(1L)), null),
+        Row(2, "bob", 2L, cdcMeta(None, Some(2L)), "b@x.com")
+      )
+    )
+  }
+
+  test("broadening the column selection between runs adds the newly-included column to " +
+    "the target") {
+    val session = spark
+    import session.implicits._
+
+    // Source DF schema is fixed at (id, name, email, version) across both runs. Only the
+    // `columnSelection` knob differs: run #1 includes (id, name, version); run #2 selects
+    // None (= all source columns). mergeSchemas adds `email` to the target via the same
+    // generic SDP path as the new-source-column case, but driven by the
+    // [[ColumnSelection]] knob rather than the source DF's own schema.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, String, String, Long)]
+    def buildCtx(selection: Option[ColumnSelection]): TestGraphRegistrationContext =
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = stream.toDF().toDF("id", "name", "email", "version"),
+        keys = Seq("id"),
+        sequencing = functions.col("version"),
+        columnSelection = selection)
+
+    // Run #1: only (id, name, version) selected; `email` is dropped before the MERGE.
+    stream.addData((1, "alice", "ignored", 1L))
+    runPipeline(buildCtx(selection = Some(ColumnSelection.IncludeColumns(
+      Seq("id", "name", "version").map(UnqualifiedColumnName(_))
+    ))))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", 1L, cdcMeta(None, Some(1L))))
+    )
+
+    // Run #2: broaden to no selection. mergeSchemas adds `email`; existing rows get NULL,
+    // new rows get the actual value.
+    stream.addData((2, "bob", "b@x.com", 2L))
+    runPipeline(buildCtx(selection = None))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(
+        Row(1, "alice", 1L, cdcMeta(None, Some(1L)), null),
+        Row(2, "bob", 2L, cdcMeta(None, Some(2L)), "b@x.com")
+      )
+    )
+  }
+
+  test("narrowing the column selection between runs preserves the dropped column on " +
+    "existing rows and leaves it NULL on new rows") {
+    val session = spark
+    import session.implicits._
+
+    // Validates the additive-only column-selection contract on the narrowing side:
+    // tightening `columnSelection` between runs leaves the dropped column in place at the
+    // schema level (SDP's `SchemaMergingUtils.mergeSchemas` is a union, never a subtraction).
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, email STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, String, String, Long)]
+    def buildCtx(selection: Option[ColumnSelection]): TestGraphRegistrationContext =
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = stream.toDF().toDF("id", "name", "email", "version"),
+        keys = Seq("id"),
+        sequencing = functions.col("version"),
+        columnSelection = selection)
+
+    // Run #1: include all columns; populate `email` for key=1.
+    stream.addData((1, "alice", "a@x.com", 1L))
+    runPipeline(buildCtx(selection = None))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", "a@x.com", 1L, cdcMeta(None, Some(1L))))
+    )
+
+    // Run #2: narrow the selection to drop `email`. The merge omits `email` from both
+    // INSERT and UPDATE assignment maps; key=1's `email` is preserved at "a@x.com" while
+    // key=2 is inserted with `email = NULL`.
+    stream.addData((2, "bob", "ignored", 2L))
+    runPipeline(buildCtx(selection = Some(ColumnSelection.IncludeColumns(
+      Seq("id", "name", "version").map(UnqualifiedColumnName(_))
+    ))))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(
+        Row(1, "alice", "a@x.com", 1L, cdcMeta(None, Some(1L))),
+        Row(2, "bob", null, 2L, cdcMeta(None, Some(2L)))
+      )
+    )
+  }
+
+  test("a top-level column dropped from the source DF between runs is preserved on " +
+    "existing rows and left NULL on new rows") {
+    val session = spark
+    import session.implicits._
+
+    // Symmetric to the new-source-column case (which exercises the source DF *gaining* a
+    // column). Validates that the additive-only column-selection contract holds when the
+    // narrowing is driven by the source DF's own schema shrinking, rather than by a
+    // tightening [[ChangeArgs.columnSelection]].
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    // Same `MemoryStream[(Int, String, Option[String], Long)]` shape across runs; runs
+    // differ in whether `email` is kept in the projected source DF.
+    val stream = MemoryStream[(Int, String, Option[String], Long)]
+    def buildCtx(includeEmail: Boolean): TestGraphRegistrationContext = {
+      val sourceDf = stream.toDF().toDF("id", "name", "email", "version")
+      val projectedDf = if (includeEmail) sourceDf else sourceDf.drop("email")
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = projectedDf,
+        keys = Seq("id"),
+        sequencing = functions.col("version"))
+    }
+
+    // Run #1: wide source DF (id, name, email, version). mergeSchemas appends `email` to
+    // the target.
+    stream.addData((1, "alice", Some("a@x.com"), 1L))
+    runPipeline(buildCtx(includeEmail = true))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", 1L, cdcMeta(None, Some(1L)), "a@x.com"))
+    )
+
+    // Run #2: source DF drops `email` upstream of the flow. Target still has `email`
+    // (`StructType.merge` is additive-only); the merge omits `email` from both INSERT and
+    // UPDATE assignment maps. Key=1's `email` is preserved at "a@x.com"; key=2 is inserted
+    // with `email = NULL`.
+    stream.addData((2, "bob", None, 2L))
+    runPipeline(buildCtx(includeEmail = false))
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(
+        Row(1, "alice", 1L, cdcMeta(None, Some(1L)), "a@x.com"),
+        Row(2, "bob", 2L, cdcMeta(None, Some(2L)), null)
+      )
+    )
+  }
+
+  test("dropping a nested struct field between runs fails with INCOMPATIBLE_DATA_FOR_TABLE") {
+    val session = spark
+    import session.implicits._
+
+    // The v2 writer's column-resolution layer requires every nested target field to be
+    // present in the microbatch DF. When run #2's source projection drops `b.c`, the merge
+    // fails with INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA. Users who want to drop a
+    // nested field between runs must full-refresh the target.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(key INT NOT NULL, version BIGINT NOT NULL, " +
+      s"value STRUCT<a:INT,b:STRUCT<c:INT,d:INT>>, $cdcMetadataDdl)"
+    )
+
+    // Stream is (key, version, a, b_c, b_d). Each run reshapes into different `value`
+    // shapes; the underlying tuple shape is unchanged so the streaming source's identity
+    // is stable across runs.
+    val stream = MemoryStream[(Int, Long, Int, Int, Int)]
+    def buildCtx(includeC: Boolean): TestGraphRegistrationContext = {
+      val src = stream.toDF().toDF("key", "version", "a", "b_c", "b_d")
+      val inner = if (includeC) {
+        functions.struct(functions.col("b_c").as("c"), functions.col("b_d").as("d"))
+      } else {
+        functions.struct(functions.col("b_d").as("d"))
+      }
+      val projected = src.select(
+        functions.col("key"),
+        functions.col("version"),
+        functions.struct(functions.col("a"), inner.as("b")).as("value")
+      )
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = projected,
+        keys = Seq("key"),
+        sequencing = functions.col("version"))
+    }
+
+    stream.addData((1, 1L, 1, 1, 1), (2, 1L, 2, 2, 2))
+    runPipeline(buildCtx(includeC = true))
+
+    // Run #2 drops b.c. The v2 writer rejects the merge because it cannot find data for
+    // the target's `value.b.c` column.
+    stream.addData((1, 2L, 10, 99, 10), (3, 1L, 3, 99, 3))
+    val ex = intercept[RuntimeException] { runPipeline(buildCtx(includeC = false)) }
+    // The V2 writer's `TableOutputResolver` produces this error during plan analysis with
+    // an empty `tableName` because the merge plan it analyzes does not carry the target's
+    // catalog identifier through to the resolver call site.
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      parameters = Map(
+        "tableName" -> "``",
+        "colName" -> "`value`.`b`.`c`"
+      )
+    )
+  }
+
+  test("a new field added inside an array<struct> element between runs is added to the " +
+    "target") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(key INT NOT NULL, version BIGINT NOT NULL, " +
+      s"vals ARRAY<STRUCT<a:INT,b:STRUCT<c:INT>>>, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, Long, Int, Int, Int)]
+    def buildCtx(includeD: Boolean): TestGraphRegistrationContext = {
+      val src = stream.toDF().toDF("key", "version", "a", "b_c", "b_d")
+      val inner = if (includeD) {
+        functions.struct(functions.col("b_c").as("c"), functions.col("b_d").as("d"))
+      } else {
+        functions.struct(functions.col("b_c").as("c"))
+      }
+      val projected = src.select(
+        functions.col("key"),
+        functions.col("version"),
+        functions.array(
+          functions.struct(functions.col("a"), inner.as("b"))
+        ).as("vals")
+      )
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = projected,
+        keys = Seq("key"),
+        sequencing = functions.col("version"))
+    }
+
+    stream.addData((1, 1L, 1, 1, 99))
+    runPipeline(buildCtx(includeD = false))
+
+    // Run #2 widens to include b.d. Existing key=1 row's vals[0].b.d is NULL until the
+    // upsert at version=2 writes the new value.
+    stream.addData((1, 2L, 1, 1, 2), (3, 1L, 3, 3, 3))
+    runPipeline(buildCtx(includeD = true))
+
+    // Inline-explode flattens the array<struct> for assertion.
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target")
+        .selectExpr("key", "inline(vals) as (a, b)")
+        .select("key", "a", "b.c", "b.d"),
+      Seq(
+        Row(1, 1, 1, 2),
+        Row(3, 3, 3, 3)
+      )
+    )
+  }
+
+  test("dropping a field inside an array<struct> element between runs fails with " +
+    "INCOMPATIBLE_DATA_FOR_TABLE") {
+    val session = spark
+    import session.implicits._
+
+    // Symmetric to the nested-struct case, but for `array<struct>`. The v2 writer rejects
+    // the merge because it cannot find data for the target's `vals.element.b.d` column
+    // when run #2's projection drops `d` from the element struct. Users must full-refresh
+    // the target to drop a nested array-element field.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(key INT NOT NULL, version BIGINT NOT NULL, " +
+      s"vals ARRAY<STRUCT<a:INT,b:STRUCT<c:INT,d:INT>>>, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, Long, Int, Int, Int)]
+    def buildCtx(includeD: Boolean): TestGraphRegistrationContext = {
+      val src = stream.toDF().toDF("key", "version", "a", "b_c", "b_d")
+      val inner = if (includeD) {
+        functions.struct(functions.col("b_c").as("c"), functions.col("b_d").as("d"))
+      } else {
+        functions.struct(functions.col("b_c").as("c"))
+      }
+      val projected = src.select(
+        functions.col("key"),
+        functions.col("version"),
+        functions.array(
+          functions.struct(functions.col("a"), inner.as("b"))
+        ).as("vals")
+      )
+      singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = projected,
+        keys = Seq("key"),
+        sequencing = functions.col("version"))
+    }
+
+    stream.addData((1, 1L, 1, 1, 1), (2, 1L, 2, 2, 2))
+    runPipeline(buildCtx(includeD = true))
+
+    stream.addData((1, 2L, 10, 10, 99), (3, 1L, 3, 3, 99))
+    val ex = intercept[RuntimeException] { runPipeline(buildCtx(includeD = false)) }
+    // See the nested-struct test above for why `tableName` is empty here.
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      parameters = Map(
+        "tableName" -> "``",
+        "colName" -> "`vals`.`element`.`b`.`d`"
+      )
+    )
+  }
+
+  test("a source DF column whose name differs from the target only by case fails with " +
+    "AMBIGUOUS_REFERENCE under case-insensitive resolution") {
+    val session = spark
+    import session.implicits._
+
+    // `DatasetManager`'s schema-merge compares the existing target schema and the flow's
+    // output schema *case-sensitively*: `SchemaMergingUtils.mergeSchemas` calls
+    // `StructType.merge` without forwarding the session-level case-sensitivity. When the
+    // target has `value` and the source DF emits `Value`, the merged schema ends up with
+    // both as separate columns. Reference resolution downstream is case-insensitive
+    // (Spark's default), so the MERGE plan trips on the duplicate and reports
+    // AMBIGUOUS_REFERENCE.
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      spark.sql(
+        s"CREATE TABLE $catalog.$namespace.target " +
+        s"(key INT NOT NULL, version BIGINT NOT NULL, value STRING, $cdcMetadataDdl)"
+      )
+
+      val stream = MemoryStream[(Int, Long, String)]
+      stream.addData((1, 1L, "alice"))
+      // Source DF emits `Value` (capital), differing only in case from the target's
+      // `value` column.
+      val df = stream.toDF().toDF("key", "version", "Value")
+      val ctx = singleAutoCdcFlowPipeline(
+        flowName = "auto_cdc_flow",
+        target = "target",
+        sourceDf = df,
+        keys = Seq("key"),
+        sequencing = functions.col("version"))
+
+      val ex = intercept[RuntimeException] { runPipeline(ctx) }
+      // The exact `name` and `referenceNames` parameters depend on internal merge-plan
+      // synthesis; the condition match is the meaningful invariant for this test.
+      checkErrorInPipelineFailure(
+        failure = ex,
+        condition = "AMBIGUOUS_REFERENCE",
+        parameters = Map(
+          "name" -> ".*",
+          "referenceNames" -> ".*"
+        ),
+        matchPVals = true,
+        queryContext = Array(
+          ExpectedContext(
+            fragment = s"`$catalog`.`$namespace`.`target`.`Value`",
+            start = 0,
+            stop = 27
+          )
+        )
+      )
+    }
+  }
+
+  test("extra columns on the target that the AutoCDC flow does not emit are preserved " +
+    "across the merge") {
+    val session = spark
+    import session.implicits._
+
+    // The target is wider than the AutoCDC flow's source DF: column `extra` is present on
+    // the target but never produced by the flow. AutoCDC must tolerate the extra target
+    // column -- pre-existing rows keep their `extra` value, and newly-inserted rows
+    // resolve `extra` to NULL.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, extra INT, $cdcMetadataDdl)"
+    )
+    insertPreloadedRow(
+      s"$catalog.$namespace.target",
+      colValues = "1, 'preloaded', 0, 42",
+      sequence = 0L
+    )
+
+    val stream = MemoryStream[(Int, String, Long)]
+    stream.addData((1, "alice", 1L), (2, "bob", 1L))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream.toDF().toDF("id", "name", "version"),
+      keys = Seq("id"),
+      sequencing = functions.col("version")))
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target").select("id", "name", "version", "extra"),
+      Seq(
+        Row(1, "alice", 1L, 42), // extra preserved on the upsert
+        Row(2, "bob", 1L, null) // extra is NULL for inserts
+      )
+    )
+  }
+
+  test("changing a non-key column type from TIMESTAMP to STRING between runs fails with " +
+    "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE") {
+    val session = spark
+    import session.implicits._
+
+    // `mergeSchemas` rejects an incompatible type change between TIMESTAMP and STRING.
+    // Captured alongside the type-widening / type-narrowing tests; users must full-refresh
+    // the target to change a column's type.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(key INT NOT NULL, version BIGINT NOT NULL, value TIMESTAMP, $cdcMetadataDdl)"
+    )
+
+    val stream1 = MemoryStream[(Int, Long, Timestamp)]
+    stream1.addData((1, 1L, Timestamp.valueOf("2024-01-01 10:00:00")))
+    runPipeline(singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream1.toDF().toDF("key", "version", "value"),
+      keys = Seq("key"),
+      sequencing = functions.col("version")))
+
+    // Run #2 emits `value` as STRING. mergeSchemas rejects the type change.
+    val stream2 = MemoryStream[(Int, Long, String)]
+    stream2.addData((1, 2L, "2024-01-02 11:00:00"))
+    val ctx2 = singleAutoCdcFlowPipeline(
+      flowName = "auto_cdc_flow",
+      target = "target",
+      sourceDf = stream2.toDF().toDF("key", "version", "value"),
+      keys = Seq("key"),
+      sequencing = functions.col("version"))
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx2) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      sqlState = Some("42825"),
+      // `left` is the persisted (run #1) TIMESTAMP type; `right` is run #2's STRING.
+      parameters = Map(
+        "left" -> "\"TIMESTAMP\"",
+        "right" -> "\"STRING\""
+      )
+    )
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1SinglePipelineSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1SinglePipelineSuite.scala
new file mode 100644
index 0000000000000..f06b8c4615339
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1SinglePipelineSuite.scala
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.functions
+import org.apache.spark.sql.pipelines.autocdc.{
+  ChangeArgs,
+  ColumnSelection,
+  ScdType,
+  UnqualifiedColumnName
+}
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Smoke tests for AutoCDC SCD type 1 flows running within a single pipeline: one
+ * [[DataflowGraph]] / [[TestPipelineUpdateContext]] executes one or more AutoCDC flows,
+ * and the target table contents are asserted at the end. Multi-pipeline scenarios (where
+ * multiple pipelines write to the same target) live in [[AutoCdcScd1MultiPipelineSuite]].
+ */
+class AutoCdcScd1SinglePipelineSuite
+    extends ExecutionTest
+    with SharedSparkSession
+    with AutoCdcGraphExecutionTestMixin {
+
+  test("an upsert event lands a new row in an empty target table") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, String, Long)]
+    stream.addData((1, "alice", 1L))
+
+    val ctx = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream.toDF().toDF("id", "name", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+
+    runPipeline(ctx)
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", 1L, cdcMeta(None, Some(1L))))
+    )
+  }
+
+  test("consecutive upsert, delete, and re-upsert events for the same key in one run " +
+    "converge to the latest event") {
+    val session = spark
+    import session.implicits._
+
+    // Target schema deliberately omits `is_delete`: the source carries it as a control
+    // column, drives the deleteCondition, and is excluded from the target projection.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, String, Long, Boolean)]
+    stream.addData(
+      (1, "alice", 1L, false), // initial upsert
+      (1, "alice", 2L, true),  // delete
+      (1, "alice2", 3L, false) // reinsert
+    )
+
+    val ctx = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream.toDF().toDF("id", "name", "version", "is_delete")),
+        keys = Seq("id"),
+        sequencing = functions.col("version"),
+        deleteCondition = Some(functions.col("is_delete") === true),
+        columnSelection = Some(ColumnSelection.ExcludeColumns(
+          Seq(UnqualifiedColumnName("is_delete"))
+        ))
+      ))
+    }
+
+    runPipeline(ctx)
+
+    // After all three events at seqs 1, 2, 3: row "alice2" wins as the highest-sequenced
+    // upsert; the delete at seq=2 is superseded by the seq=3 upsert.
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice2", 3L, cdcMeta(None, Some(3L))))
+    )
+  }
+
+  test("two AutoCDC flows targeting separate tables in one pipeline produce independent " +
+    "results") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.t_a " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.t_b " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val streamA = MemoryStream[(Int, Long)]
+    val streamB = MemoryStream[(Int, Long)]
+    streamA.addData((1, 1L), (2, 1L))
+    streamB.addData((10, 1L))
+
+    val ctx = new TestGraphRegistrationContext(spark) {
+      registerTable("t_a", catalog = Some(catalog), database = Some(namespace))
+      registerTable("t_b", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "flow_a",
+        target = "t_a",
+        query = dfFlowFunc(streamA.toDF().toDF("id", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+      registerFlow(autoCdcFlow(
+        name = "flow_b",
+        target = "t_b",
+        query = dfFlowFunc(streamB.toDF().toDF("id", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    runPipeline(ctx)
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.t_a"),
+      Seq(Row(1, 1L, cdcMeta(None, Some(1L))), Row(2, 1L, cdcMeta(None, Some(1L))))
+    )
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.t_b"),
+      Seq(Row(10, 1L, cdcMeta(None, Some(1L))))
+    )
+    assert(spark.catalog.tableExists(auxTableNameFor("t_a")))
+    assert(spark.catalog.tableExists(auxTableNameFor("t_b")))
+  }
+
+  test("an AutoCDC flow targeting a table whose format does not support row-level " +
+    "operations fails with AUTOCDC_TARGET_DOES_NOT_SUPPORT_MERGE") {
+    val session = spark
+    import session.implicits._
+
+    // Intentionally use a non-merge-compatible catalog, whose default table format is parquet.
+    val catalog = TestGraphRegistrationContext.DEFAULT_CATALOG
+    val database = TestGraphRegistrationContext.DEFAULT_DATABASE
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$database.target_no_merge " +
+      s"(id INT NOT NULL, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+
+    val stream = MemoryStream[(Int, Long)]
+    stream.addData((1, 1L))
+
+    val ctx = new TestGraphRegistrationContext(spark) {
+      registerTable("target_no_merge")
+      registerFlow(AutoCdcFlow(
+        identifier = fullyQualifiedIdentifier("auto_cdc_flow"),
+        destinationIdentifier = fullyQualifiedIdentifier("target_no_merge"),
+        func = dfFlowFunc(stream.toDF().toDF("id", "version")),
+        queryContext = QueryContext(
+          currentCatalog = Some(catalog),
+          currentDatabase = Some(database)
+        ),
+        origin = QueryOrigin.empty,
+        changeArgs = ChangeArgs(
+          keys = Seq(UnqualifiedColumnName("id")),
+          sequencing = functions.col("version"),
+          storedAsScdType = ScdType.Type1
+        )
+      ))
+    }
+
+    val ex = intercept[RuntimeException] { runPipeline(ctx) }
+    checkErrorInPipelineFailure(
+      failure = ex,
+      condition = "AUTOCDC_TARGET_DOES_NOT_SUPPORT_MERGE",
+      sqlState = Some("0A000"),
+      parameters = Map(
+        "tableName" -> s"`$catalog`.`$database`.`target_no_merge`",
+        "format" -> "parquet"
+      )
+    )
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1TargetTableDurabilitySuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1TargetTableDurabilitySuite.scala
new file mode 100644
index 0000000000000..46f8ee47db02f
--- /dev/null
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/AutoCdcScd1TargetTableDurabilitySuite.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.pipelines.graph
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.functions
+import org.apache.spark.sql.pipelines.autocdc.Scd1BatchProcessor
+import org.apache.spark.sql.pipelines.utils.{ExecutionTest, TestGraphRegistrationContext}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Tests covering AutoCDC's behavior when the target table is pre-populated by something
+ * other than a prior AutoCDC run: pre-loaded rows, missing CDC metadata column on the
+ * target, and rows with NULL CDC metadata. These cases verify that AutoCDC interoperates
+ * gracefully with users who hand-populate the target table.
+ */
+class AutoCdcScd1TargetTableDurabilitySuite
+    extends ExecutionTest
+    with SharedSparkSession
+    with AutoCdcGraphExecutionTestMixin {
+
+  test("pre-loaded rows: an event with a lower sequence is suppressed and a higher one " +
+    "wins") {
+    val session = spark
+    import session.implicits._
+
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    insertPreloadedRow(s"$catalog.$namespace.target", "1, 'alice', 5", 5L)
+    insertPreloadedRow(s"$catalog.$namespace.target", "2, 'bob', 5", 5L)
+
+    val stream = MemoryStream[(Int, String, Long)]
+    stream.addData(
+      (1, "stale", 2L),  // < pre-existing seq=5 -> ignored
+      (2, "bob2", 10L)   // > pre-existing seq=5 -> upserts
+    )
+    val ctx = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream.toDF().toDF("id", "name", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    runPipeline(ctx)
+
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(
+        Row(1, "alice", 5L, cdcMeta(None, Some(5L))),
+        Row(2, "bob2", 10L, cdcMeta(None, Some(10L)))
+      )
+    )
+  }
+
+  test("pre-loaded target rows merge correctly on the first AutoCDC run, and the " +
+    "auxiliary table is created lazily") {
+    val session = spark
+    import session.implicits._
+
+    // Target was populated by some external process; this is the first AutoCDC run.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL, $cdcMetadataDdl)"
+    )
+    insertPreloadedRow(s"$catalog.$namespace.target", "1, 'alice', 1", 1L)
+
+    assert(
+      !spark.catalog.tableExists(auxTableNameFor("target")),
+      "Auxiliary table should not exist before the first AutoCDC run"
+    )
+
+    val stream = MemoryStream[(Int, String, Long)]
+    stream.addData((1, "bob", 2L))
+
+    val ctx = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream.toDF().toDF("id", "name", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    runPipeline(ctx)
+
+    // seq=2 > pre-existing seq=1, so "bob" replaces "alice" via the upsert sequence column.
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "bob", 2L, cdcMeta(None, Some(2L))))
+    )
+    assert(
+      spark.catalog.tableExists(auxTableNameFor("target")),
+      "Auxiliary table should be created lazily on the first AutoCDC run"
+    )
+  }
+
+  test("a target table created without the CDC metadata column gets the column " +
+    "auto-added on the first AutoCDC run") {
+    val session = spark
+    import session.implicits._
+
+    // User creates the target without the AutoCDC metadata column. DatasetManager evolves
+    // the existing table schema by merging it with the AutoCdcMergeFlow's output schema,
+    // which includes the metadata column. The first run therefore proceeds normally, and
+    // subsequent reads see the metadata struct alongside the user's data columns.
+    spark.sql(
+      s"CREATE TABLE $catalog.$namespace.target " +
+      s"(id INT NOT NULL, name STRING, version BIGINT NOT NULL)"
+    )
+
+    val stream = MemoryStream[(Int, String, Long)]
+    stream.addData((1, "alice", 1L))
+
+    val ctx = new TestGraphRegistrationContext(spark) {
+      registerTable("target", catalog = Some(catalog), database = Some(namespace))
+      registerFlow(autoCdcFlow(
+        name = "auto_cdc_flow",
+        target = "target",
+        query = dfFlowFunc(stream.toDF().toDF("id", "name", "version")),
+        keys = Seq("id"),
+        sequencing = functions.col("version")
+      ))
+    }
+    runPipeline(ctx)
+
+    val schema = spark.table(s"$catalog.$namespace.target").schema
+    assert(
+      schema.fieldNames.contains(Scd1BatchProcessor.cdcMetadataColName),
+      s"Target must have ${Scd1BatchProcessor.cdcMetadataColName} after first AutoCDC run; " +
+      s"got ${schema.fieldNames.toSeq}"
+    )
+    checkAnswer(
+      spark.table(s"$catalog.$namespace.target"),
+      Seq(Row(1, "alice", 1L, cdcMeta(None, Some(1L))))
+    )
+  }
+}
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectInvalidPipelineSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectInvalidPipelineSuite.scala
index f37716b4a24d3..6eda2afdcdb8a 100644
--- a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectInvalidPipelineSuite.scala
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectInvalidPipelineSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.pipelines.graph
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.pipelines.autocdc.{ChangeArgs, ScdType, UnqualifiedColumnName}
 import org.apache.spark.sql.pipelines.utils.{PipelineTest, TestGraphRegistrationContext}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StructType}
@@ -547,4 +548,220 @@ class ConnectInvalidPipelineSuite extends PipelineTest with SharedSparkSession {
     assert(!ex1.getMessage.contains(streamingTableHint))
     assert(ex2.getMessage.contains(streamingTableHint))
   }
+
+  test(
+    "AutoCDC flow targeting a materialized view fails with " +
+    "STREAMING_RELATION_FOR_MATERIALIZED_VIEW"
+  ) {
+    val session = spark
+    import session.implicits._
+
+    val graph = new TestGraphRegistrationContext(spark) {
+      val cdcEvents = MemoryStream[Int].toDF().select($"value" as "id", $"value" as "seq")
+      registerTable(
+        Table(
+          identifier = fullyQualifiedIdentifier("target"),
+          comment = None,
+          specifiedSchema = None,
+          partitionCols = None,
+          clusterCols = None,
+          properties = Map.empty,
+          origin = QueryOrigin.empty,
+          format = Some("parquet"),
+          normalizedPath = None,
+          isStreamingTable = false
+        )
+      )
+      registerFlow(
+        AutoCdcFlow(
+          identifier = fullyQualifiedIdentifier("auto_cdc_flow"),
+          destinationIdentifier = fullyQualifiedIdentifier("target"),
+          func = dfFlowFunc(cdcEvents),
+          queryContext = QueryContext(
+            currentCatalog = Some(TestGraphRegistrationContext.DEFAULT_CATALOG),
+            currentDatabase = Some(TestGraphRegistrationContext.DEFAULT_DATABASE)
+          ),
+          origin = QueryOrigin.empty,
+          changeArgs = ChangeArgs(
+            keys = Seq(UnqualifiedColumnName("id")),
+            sequencing = $"seq",
+            storedAsScdType = ScdType.Type1
+          )
+        )
+      )
+    }.resolveToDataflowGraph()
+
+    val ex = intercept[AnalysisException] {
+      graph.validate()
+    }
+
+    checkError(
+      exception = ex,
+      condition = "INVALID_FLOW_QUERY_TYPE.STREAMING_RELATION_FOR_MATERIALIZED_VIEW",
+      parameters = Map(
+        "flowIdentifier" -> fullyQualifiedIdentifier("auto_cdc_flow").quotedString,
+        "tableIdentifier" -> fullyQualifiedIdentifier("target").quotedString
+      )
+    )
+  }
+
+  test(
+    "AutoCDC flow targeting a persisted view fails with STREAMING_RELATION_FOR_PERSISTED_VIEW"
+  ) {
+    val session = spark
+    import session.implicits._
+
+    val graph = new TestGraphRegistrationContext(spark) {
+      val cdcEvents = MemoryStream[Int].toDF().select($"value" as "id", $"value" as "seq")
+      registerView(
+        PersistedView(
+          identifier = fullyQualifiedIdentifier("target_view"),
+          properties = Map.empty,
+          sqlText = None,
+          comment = None,
+          origin = QueryOrigin.empty
+        )
+      )
+      registerFlow(
+        AutoCdcFlow(
+          identifier = fullyQualifiedIdentifier("target_view"),
+          destinationIdentifier = fullyQualifiedIdentifier("target_view"),
+          func = dfFlowFunc(cdcEvents),
+          queryContext = QueryContext(
+            currentCatalog = Some(TestGraphRegistrationContext.DEFAULT_CATALOG),
+            currentDatabase = Some(TestGraphRegistrationContext.DEFAULT_DATABASE)
+          ),
+          origin = QueryOrigin.empty,
+          changeArgs = ChangeArgs(
+            keys = Seq(UnqualifiedColumnName("id")),
+            sequencing = $"seq",
+            storedAsScdType = ScdType.Type1
+          )
+        )
+      )
+    }.resolveToDataflowGraph()
+
+    val ex = intercept[AnalysisException] {
+      graph.validate()
+    }
+
+    checkError(
+      exception = ex,
+      condition = "INVALID_FLOW_QUERY_TYPE.STREAMING_RELATION_FOR_PERSISTED_VIEW",
+      parameters = Map(
+        "flowIdentifier" -> fullyQualifiedIdentifier("target_view").quotedString,
+        "viewIdentifier" -> fullyQualifiedIdentifier("target_view").quotedString
+      )
+    )
+  }
+
+  test(
+    "AutoCDC flow targeting a temporary view fails with AUTOCDC_RELATION_FOR_TEMPORARY_VIEW"
+  ) {
+    // Temporary views in SDP normally accept either streaming or batch-producing flows, but
+    // AutoCDC flows are an explicit exception: SCD reconciliation only runs at the
+    // streaming-table sink (`Scd1ForeachBatchHandler`), so pointing an AutoCDC flow at a view
+    // would silently drop reconciliation and expose just the projected CDF to consumers.
+    // `validateFlowStreamingness` rejects this case with a dedicated sub-condition under
+    // INVALID_FLOW_QUERY_TYPE.
+    val session = spark
+    import session.implicits._
+
+    val graph = new TestGraphRegistrationContext(spark) {
+      val cdcEvents = MemoryStream[Int].toDF().select($"value" as "id", $"value" as "seq")
+      // A pipeline must contain at least one non-temporary dataset; register an unrelated
+      // streaming table so the pipeline is non-empty and we can exercise the AutoCDC path.
+      registerTable(
+        "dummy_table",
+        query = Some(dfFlowFunc(MemoryStream[Int].toDF()))
+      )
+      registerView(
+        TemporaryView(
+          identifier = fullyQualifiedIdentifier("target_view"),
+          properties = Map.empty,
+          sqlText = None,
+          comment = None,
+          origin = QueryOrigin.empty
+        )
+      )
+      registerFlow(
+        AutoCdcFlow(
+          identifier = fullyQualifiedIdentifier("target_view"),
+          destinationIdentifier = fullyQualifiedIdentifier("target_view"),
+          func = dfFlowFunc(cdcEvents),
+          queryContext = QueryContext(
+            currentCatalog = Some(TestGraphRegistrationContext.DEFAULT_CATALOG),
+            currentDatabase = Some(TestGraphRegistrationContext.DEFAULT_DATABASE)
+          ),
+          origin = QueryOrigin.empty,
+          changeArgs = ChangeArgs(
+            keys = Seq(UnqualifiedColumnName("id")),
+            sequencing = $"seq",
+            storedAsScdType = ScdType.Type1
+          )
+        )
+      )
+    }.resolveToDataflowGraph()
+
+    val ex = intercept[AnalysisException] {
+      graph.validate()
+    }
+
+    checkError(
+      exception = ex,
+      condition = "INVALID_FLOW_QUERY_TYPE.AUTOCDC_RELATION_FOR_TEMPORARY_VIEW",
+      parameters = Map(
+        "flowIdentifier" -> fullyQualifiedIdentifier("target_view").quotedString,
+        "viewIdentifier" -> fullyQualifiedIdentifier("target_view").quotedString
+      )
+    )
+  }
+
+  test("A multiquery table cannot have an AutoCDC query input") {
+    val session = spark
+    import session.implicits._
+
+    val graph = new TestGraphRegistrationContext(spark) {
+      val cdcEvents = MemoryStream[Int].toDF().select($"value" as "id", $"value" as "seq")
+      registerTable("target")
+      registerFlow(
+        AutoCdcFlow(
+          identifier = fullyQualifiedIdentifier("auto_cdc_flow"),
+          destinationIdentifier = fullyQualifiedIdentifier("target"),
+          func = dfFlowFunc(cdcEvents),
+          queryContext = QueryContext(
+            currentCatalog = Some(TestGraphRegistrationContext.DEFAULT_CATALOG),
+            currentDatabase = Some(TestGraphRegistrationContext.DEFAULT_DATABASE)
+          ),
+          origin = QueryOrigin.empty,
+          changeArgs = ChangeArgs(
+            keys = Seq(UnqualifiedColumnName("id")),
+            sequencing = $"seq",
+            storedAsScdType = ScdType.Type1
+          )
+        )
+      )
+      registerFlow(
+        destinationName = "target",
+        name = "extra_flow",
+        query = dfFlowFunc(MemoryStream[Int].toDF().select($"value" as "id", $"value" as "seq"))
+      )
+    }.resolveToDataflowGraph()
+
+    val ex = intercept[AnalysisException] {
+      graph.validate()
+    }
+
+    checkError(
+      exception = ex,
+      condition = "AUTOCDC_MULTIPLE_FLOWS_TO_TARGET",
+      parameters = Map(
+        "tableName" -> fullyQualifiedIdentifier("target").unquotedString,
+        "flows" -> Seq(
+          fullyQualifiedIdentifier("auto_cdc_flow").unquotedString,
+          fullyQualifiedIdentifier("extra_flow").unquotedString
+        ).sorted.mkString(", ")
+      )
+    )
+  }
 }
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectValidPipelineSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectValidPipelineSuite.scala
index 3ac3c09017506..58a6dff709c78 100644
--- a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectValidPipelineSuite.scala
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectValidPipelineSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.plans.logical.Union
 import org.apache.spark.sql.execution.streaming.runtime.MemoryStream
+import org.apache.spark.sql.pipelines.autocdc.{ChangeArgs, ScdType, UnqualifiedColumnName}
 import org.apache.spark.sql.pipelines.utils.{PipelineTest, TestGraphRegistrationContext}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -406,7 +407,7 @@ class ConnectValidPipelineSuite extends PipelineTest with SharedSparkSession {
       mem.addData(1, 2)
       registerPersistedView("complete-view", query = dfFlowFunc(Seq(1, 2).toDF("x")))
       registerPersistedView("incremental-view", query = dfFlowFunc(mem.toDF()))
-      registerTable("`complete-table`", query = Option(readFlowFunc("complete-view")))
+      registerTable("`complete-table`", query = Option(readFlowFunc("`complete-view`")))
       registerTable("`incremental-table`")
       registerFlow(
         "`incremental-table`",
@@ -509,6 +510,38 @@ class ConnectValidPipelineSuite extends PipelineTest with SharedSparkSession {
     assert(g.flow(TableIdentifier("sink_flow")).isInstanceOf[StreamingFlow])
   }
 
+  test("AutoCdcFlow registers and resolves to AutoCdcMergeFlow") {
+    val session = spark
+    import session.implicits._
+
+    val P = new TestGraphRegistrationContext(spark) {
+      val mem = MemoryStream[Int]
+      val cdcEvents = mem.toDF().select($"value" as "id", $"value" as "seq")
+      registerTable("target")
+      registerFlow(
+        AutoCdcFlow(
+          identifier = fullyQualifiedIdentifier("auto_cdc_flow"),
+          destinationIdentifier = fullyQualifiedIdentifier("target"),
+          func = dfFlowFunc(cdcEvents),
+          queryContext = QueryContext(
+            currentCatalog = Some(TestGraphRegistrationContext.DEFAULT_CATALOG),
+            currentDatabase = Some(TestGraphRegistrationContext.DEFAULT_DATABASE)
+          ),
+          origin = QueryOrigin.empty,
+          changeArgs = ChangeArgs(
+            keys = Seq(UnqualifiedColumnName("id")),
+            sequencing = $"seq",
+            storedAsScdType = ScdType.Type1
+          )
+        )
+      )
+    }
+    val g = P.resolveToDataflowGraph()
+    assert(
+      g.flow(fullyQualifiedIdentifier("auto_cdc_flow")).isInstanceOf[AutoCdcMergeFlow]
+    )
+  }
+
   /** Verifies the [[DataflowGraph]] has the specified [[Flow]] with the specified schema. */
   private def verifyFlowSchema(
       pipeline: DataflowGraph,
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/MaterializeTablesSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/MaterializeTablesSuite.scala
index ecb810dec2911..29d85e9b44397 100644
--- a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/MaterializeTablesSuite.scala
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/MaterializeTablesSuite.scala
@@ -327,6 +327,7 @@ abstract class MaterializeTablesSuite extends BaseCoreExecutionTest {
   }
 
   test("specified schema incompatible with existing table") {
+    implicit val sqlCtx: SQLContext = spark.sqlContext
 
     sql(s"CREATE TABLE ${TestGraphRegistrationContext.DEFAULT_DATABASE}.t6(x BOOLEAN)")
     val catalog = spark.sessionState.catalogManager.currentCatalog.asInstanceOf[TableCatalog]
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/APITest.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/APITest.scala
index c6b457ee04eba..f59994c9490b8 100644
--- a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/APITest.scala
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/APITest.scala
@@ -267,9 +267,7 @@ trait APITest
         name = "transformations/definition.py",
         contents = """
                      |from pyspark import pipelines as dp
-                     |from pyspark.sql import DataFrame, SparkSession
-                     |
-                     |spark = SparkSession.active()
+                     |from pyspark.sql import DataFrame
                      |
                      |@dp.append_flow(target = "c", name = "append_to_c")
                      |def flow():
@@ -296,9 +294,7 @@ trait APITest
         name = "transformations/mv.py",
         contents = """
                      |from pyspark import pipelines as dp
-                     |from pyspark.sql import DataFrame, SparkSession
-                     |
-                     |spark = SparkSession.active()
+                     |from pyspark.sql import DataFrame
                      |
                      |@dp.materialized_view
                      |def src():
@@ -308,9 +304,7 @@ trait APITest
         name = "transformations/st.py",
         contents = """
                      |from pyspark import pipelines as dp
-                     |from pyspark.sql import DataFrame, SparkSession
-                     |
-                     |spark = SparkSession.active()
+                     |from pyspark.sql import DataFrame
                      |
                      |@dp.materialized_view
                      |def a():
@@ -347,9 +341,7 @@ trait APITest
         name = "transformations/definition.py",
         contents = """
                      |from pyspark import pipelines as dp
-                     |from pyspark.sql import DataFrame, SparkSession
-                     |
-                     |spark = SparkSession.active()
+                     |from pyspark.sql import DataFrame
                      |
                      |@dp.materialized_view
                      |def a():
@@ -374,6 +366,52 @@ trait APITest
   }
 
   /* Python Language Tests */
+  test("Python Pipeline with explicit spark assignment is backward compatible") {
+    val pipelineSpec =
+      TestPipelineSpec(include = Seq("transformations/**"))
+    val pipelineConfig = TestPipelineConfiguration(pipelineSpec)
+    val sources = Seq(
+      PipelineSourceFile(
+        name = "transformations/definition.py",
+        contents = """
+                     |from pyspark import pipelines as dp
+                     |from pyspark.sql import SparkSession
+                     |
+                     |spark = SparkSession.active()
+                     |
+                     |@dp.materialized_view
+                     |def mv():
+                     |  return spark.range(5)
+                     |""".stripMargin))
+    val pipeline = createAndRunPipeline(pipelineConfig, sources)
+    awaitPipelineTermination(pipeline)
+
+    checkAnswer(spark.sql(s"SELECT * FROM mv"), Seq(Row(0), Row(1), Row(2), Row(3), Row(4)))
+  }
+
+  test("Python Pipeline with spark session placeholder works as expected") {
+    val pipelineSpec =
+      TestPipelineSpec(include = Seq("transformations/**"))
+    val pipelineConfig = TestPipelineConfiguration(pipelineSpec)
+    val sources = Seq(
+      PipelineSourceFile(
+        name = "transformations/definition.py",
+        contents = """
+                     |from pyspark import pipelines as dp
+                     |from pyspark.sql import SparkSession
+                     |
+                     |spark: SparkSession
+                     |
+                     |@dp.materialized_view
+                     |def mv():
+                     |  return spark.range(5)
+                     |""".stripMargin))
+    val pipeline = createAndRunPipeline(pipelineConfig, sources)
+    awaitPipelineTermination(pipeline)
+
+    checkAnswer(spark.sql(s"SELECT * FROM mv"), Seq(Row(0), Row(1), Row(2), Row(3), Row(4)))
+  }
+
   test("Python Pipeline with materialized_view, create_streaming_table, and append_flow") {
     val pipelineSpec =
       TestPipelineSpec(include = Seq("transformations/**"))
@@ -383,9 +421,7 @@ trait APITest
         name = "transformations/st.py",
         contents = s"""
            |from pyspark import pipelines as dp
-           |from pyspark.sql import DataFrame, SparkSession
-           |
-           |spark = SparkSession.active()
+           |from pyspark.sql import DataFrame
            |
            |dp.create_streaming_table(
            |  name = "a",
@@ -401,9 +437,7 @@ trait APITest
         name = "transformations/mv.py",
         contents = s"""
            |from pyspark import pipelines as dp
-           |from pyspark.sql import DataFrame, SparkSession
-           |
-           |spark = SparkSession.active()
+           |from pyspark.sql import DataFrame
            |
            |@dp.materialized_view(
            |  name = "src",
@@ -431,9 +465,7 @@ trait APITest
         name = "transformations/definition.py",
         contents = """
                      |from pyspark import pipelines as dp
-                     |from pyspark.sql import DataFrame, SparkSession
-                     |
-                     |spark = SparkSession.active()
+                     |from pyspark.sql import DataFrame
                      |
                      |@dp.temporary_view(
                      | name = "view_1",
@@ -475,9 +507,7 @@ trait APITest
             contents =
               s"""
                  |from pyspark import pipelines as dp
-                 |from pyspark.sql import DataFrame, SparkSession
-                 |
-                 |spark = SparkSession.active()
+                 |from pyspark.sql import DataFrame
                  |
                  |dp.create_sink(
                  |  "mySink",
@@ -518,11 +548,9 @@ trait APITest
         name = "transformations/definition.py",
         contents = """
                      |from pyspark import pipelines as dp
-                     |from pyspark.sql import DataFrame, SparkSession
+                     |from pyspark.sql import DataFrame
                      |from pyspark.sql.functions import col
                      |
-                     |spark = SparkSession.active()
-                     |
                      |@dp.materialized_view(partition_cols = ["id_mod"])
                      |def mv():
                      |  return spark.range(5).withColumn("id_mod", col("id") % 2)
@@ -551,11 +579,9 @@ trait APITest
         name = "transformations/definition.py",
         contents = """
                      |from pyspark import pipelines as dp
-                     |from pyspark.sql import DataFrame, SparkSession
+                     |from pyspark.sql import DataFrame
                      |from pyspark.sql.functions import col
                      |
-                     |spark = SparkSession.active()
-                     |
                      |@dp.materialized_view(cluster_by = ["cluster_col1"])
                      |def mv():
                      |  df = spark.range(10)
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/TestGraphRegistrationContext.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/TestGraphRegistrationContext.scala
index 9ff92ee895b1d..068171a46aa16 100644
--- a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/TestGraphRegistrationContext.scala
+++ b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/TestGraphRegistrationContext.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.sql.pipelines.utils
 
-import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{LocalTempView, PersistedView => PersistedViewType, UnresolvedRelation, ViewType}
 import org.apache.spark.sql.classic.{DataFrame, SparkSession}
-import org.apache.spark.sql.pipelines.graph.{DataflowGraph, FlowAnalysis, FlowFunction, GraphIdentifierManager, GraphRegistrationContext, PersistedView, QueryContext, QueryOrigin, QueryOriginType, Sink, SinkImpl, Table, TemporaryView, UnresolvedFlow}
+import org.apache.spark.sql.pipelines.graph.{DataflowGraph, FlowAnalysis, FlowFunction, GraphIdentifierManager, GraphRegistrationContext, PersistedView, QueryContext, QueryOrigin, QueryOriginType, Sink, SinkImpl, Table, TemporaryView, UnresolvedFlow, UntypedFlow}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -29,7 +28,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  * A test class to simplify the creation of pipelines and datasets for unit testing.
  */
 class TestGraphRegistrationContext(
-    val _spark: SparkSession,
+    val spark: SparkSession,
     val sqlConf: Map[String, String] = Map.empty)
     extends GraphRegistrationContext(
       defaultCatalog = TestGraphRegistrationContext.DEFAULT_CATALOG,
@@ -37,9 +36,8 @@ class TestGraphRegistrationContext(
       defaultSqlConf = sqlConf
     ) {
 
-  /** Re-expose as implicit so nested anonymous classes can use it without shadowing issues */
-  implicit def spark: SparkSession = _spark
-  implicit def sqlContext: SQLContext = _spark.sqlContext
+  /** Expose all registered flows for tests */
+  def getFlows: List[UnresolvedFlow] = flows.toList
 
   // scalastyle:off
   // Disable scalastyle to ignore argument count.
@@ -150,7 +148,7 @@ class TestGraphRegistrationContext(
     val qualifiedIdentifier = GraphIdentifierManager
           .parseAndQualifyTableIdentifier(
             rawTableIdentifier = GraphIdentifierManager
-              .parseTableIdentifier(name, _spark),
+              .parseTableIdentifier(name, spark),
             currentCatalog = catalog.orElse(Some(defaultCatalog)),
             currentDatabase = database.orElse(Some(defaultDatabase)))
           .identifier
@@ -176,7 +174,7 @@ class TestGraphRegistrationContext(
 
     if (query.isDefined) {
       registerFlow(
-        new UnresolvedFlow(
+        UntypedFlow(
           identifier = qualifiedIdentifier,
           destinationIdentifier = qualifiedIdentifier,
           func = query.get,
@@ -267,7 +265,7 @@ class TestGraphRegistrationContext(
     )
 
     registerFlow(
-      new UnresolvedFlow(
+      UntypedFlow(
         identifier = viewIdentifier,
         destinationIdentifier = viewIdentifier,
         func = query,
@@ -309,9 +307,9 @@ class TestGraphRegistrationContext(
       catalog: Option[String] = None,
       database: Option[String] = None
   ): Unit = {
-    val rawFlowIdentifier = GraphIdentifierManager.parseTableIdentifier(name, _spark)
+    val rawFlowIdentifier = GraphIdentifierManager.parseTableIdentifier(name, spark)
     val rawDestinationIdentifier =
-      GraphIdentifierManager.parseTableIdentifier(destinationName, _spark)
+      GraphIdentifierManager.parseTableIdentifier(destinationName, spark)
 
     val flowWritesToView = getViews
         .filter(_.isInstanceOf[TemporaryView])
@@ -339,7 +337,7 @@ class TestGraphRegistrationContext(
       }
 
     registerFlow(
-      new UnresolvedFlow(
+      UntypedFlow(
         identifier = flowIdentifier,
         destinationIdentifier = flowDestinationIdentifier,
         func = query,
@@ -360,19 +358,31 @@ class TestGraphRegistrationContext(
   /**
    * Creates a flow function from a logical plan that reads from a table with the given name.
    */
-  def readFlowFunc(name: String): FlowFunction = {
-    FlowAnalysis.createFlowFunctionFromLogicalPlan(UnresolvedRelation(TableIdentifier(name)))
+  def readFlowFunc(
+       name: String,
+       extraOptions: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()
+  ): FlowFunction = {
+    FlowAnalysis.createFlowFunctionFromLogicalPlan(
+      UnresolvedRelation(
+        tableIdentifier = GraphIdentifierManager.parseTableIdentifier(name, spark),
+        extraOptions = extraOptions,
+        isStreaming = false
+      )
+    )
   }
 
   /**
    * Creates a flow function from a logical plan that reads a stream from a table with the given
    * name.
    */
-  def readStreamFlowFunc(name: String): FlowFunction = {
+  def readStreamFlowFunc(
+       name: String,
+       extraOptions: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()
+  ): FlowFunction = {
     FlowAnalysis.createFlowFunctionFromLogicalPlan(
       UnresolvedRelation(
-        TableIdentifier(name),
-        extraOptions = CaseInsensitiveStringMap.empty(),
+        tableIdentifier = GraphIdentifierManager.parseTableIdentifier(name, spark),
+        extraOptions = extraOptions,
         isStreaming = true
       )
     )
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 2e1821eff791e..6ca2ecb302f0d 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index c040deddf4eeb..7630f7875ed21 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/udf/worker/README.md b/udf/worker/README.md
index fa27430b62b62..b843c430d0e04 100644
--- a/udf/worker/README.md
+++ b/udf/worker/README.md
@@ -5,44 +5,162 @@ Package structure for the UDF worker framework described in
 
 ## Overview
 
-Spark processes a UDF by first obtaining a **WorkerDispatcher** from the worker
-specification (plus context such as security scope). The dispatcher manages the
-actual worker processes behind the scenes -- pooling, reuse, and termination are
-all invisible to Spark.
+Spark processes a UDF by obtaining a **WorkerDispatcher** from a worker
+specification. The dispatcher manages workers behind the scenes. From
+the dispatcher, Spark gets a **WorkerSession** -- one per UDF invocation --
+with an Iterator-to-Iterator `process` API that streams input batches
+through the worker and returns result batches.
 
-From the dispatcher, Spark gets a **WorkerSession**, which represents one single
-UDF execution and can carry per-execution state. A WorkerSession is not 1-to-1
-mapped to an actual worker -- multiple sessions may share the same underlying
-worker when it is reused. Worker reuse is managed by each dispatcher
-implementation based on the worker specification.
+```
+UDFWorkerSpecification   -- how to create and configure workers
+    |
+    v
+WorkerDispatcher      -- manages workers, creates sessions
+    |
+    v
+WorkerSession         -- one UDF execution
+    |  1. session.init(InitMessage(payload, inputSchema, outputSchema))
+    |  2. val results = session.process(inputBatches)
+    |  3. session.close()
+```
+
+How workers are created depends on the dispatcher implementation. The
+framework currently provides **direct worker creation** (local OS
+processes) and is designed for future **indirect creation** (via a
+provisioning service or daemon).
 
 ## Sub-packages
 
 ```
 udf/worker/
-├── proto/  Protobuf definition of the worker specification
-│           (UDFWorkerSpecification).
-│           WorkerSpecification   -- typed Scala wrapper around the protobuf spec.
-└── core/   Engine-side APIs (all @Experimental):
-              WorkerDispatcher      -- manages workers for one spec; creates sessions.
-              WorkerSession         -- represents one single UDF execution.
-              WorkerSecurityScope   -- security boundary for connection pooling.
+├── proto/
+│     worker_spec.proto           -- UDFWorkerSpecification protobuf (+ generated Java classes)
+│     common.proto                -- shared enums (UDFWorkerDataFormat, etc.)
+│
+└── core/                         -- abstract interfaces
+      WorkerDispatcher.scala      -- creates sessions, manages worker lifecycle
+      WorkerSession.scala         -- per-UDF init/process/cancel/close + InitMessage
+      WorkerConnection.scala      -- transport channel abstraction
+      WorkerSecurityScope.scala   -- security boundary for worker pooling
+      │
+      └── direct/                 -- "direct" creation: local OS processes
+            DirectWorkerDispatcher.scala  -- spawns processes, env lifecycle
+            DirectWorkerProcess.scala     -- OS process + connection + UDS socket
+            DirectWorkerSession.scala     -- session backed by a direct process
+```
+
+The `core/` package defines abstract interfaces that are independent of how
+workers are created. The `core/direct/` sub-package implements "direct"
+worker creation where Spark spawns local OS processes. Future packages
+(e.g., `core/indirect/`) can implement alternative creation modes such as
+obtaining workers from a provisioning service or daemon.
+
+### Direct worker creation
+
+`DirectWorkerDispatcher` spawns worker processes locally. On the first
+session, it runs the optional environment lifecycle callables from the
+`UDFWorkerSpecification`:
+
+- **`environmentVerification`** -- checks if the environment is ready
+  (exit 0 = ready). When it succeeds, installation is skipped.
+- **`installation`** -- prepares the environment (installs runtime,
+  dependencies, worker binaries). Only runs when verification is absent
+  or fails.
+- **`environmentCleanup`** -- runs after the dispatcher is closed or on
+  JVM shutdown to clean up temporary resources.
+
+Environment setup runs **once per dispatcher** (not per session).
+Workers are terminated via SIGTERM/SIGKILL when the dispatcher is closed.
+
+## Basic usage (Scala)
+
+```scala
+import org.apache.spark.udf.worker.{
+  DirectWorker, ProcessCallable, UDFProtoCommunicationPattern,
+  UDFWorkerDataFormat, UDFWorkerProperties, UDFWorkerSpecification,
+  UnixDomainSocket, WorkerCapabilities, WorkerConnectionSpec, WorkerEnvironment}
+import org.apache.spark.udf.worker.core._
+
+// 1. Define a worker spec (direct creation mode).
+val spec = UDFWorkerSpecification.newBuilder()
+  .setEnvironment(WorkerEnvironment.newBuilder()
+    .setEnvironmentVerification(ProcessCallable.newBuilder()
+      .addCommand("python").addCommand("-c").addCommand("import my_udf_worker").build())
+    .setInstallation(ProcessCallable.newBuilder()
+      .addCommand("pip").addCommand("install").addCommand("my_udf_worker").build())
+    .build())
+  .setCapabilities(WorkerCapabilities.newBuilder()
+    .addSupportedDataFormats(UDFWorkerDataFormat.ARROW)
+    .addSupportedCommunicationPatterns(
+      UDFProtoCommunicationPattern.BIDIRECTIONAL_STREAMING)
+    .build())
+  .setDirect(DirectWorker.newBuilder()
+    .setRunner(ProcessCallable.newBuilder()
+      .addCommand("python").addCommand("-m").addCommand("my_udf_worker").build())
+    .setProperties(UDFWorkerProperties.newBuilder()
+      .setConnection(WorkerConnectionSpec.newBuilder()
+        .setUnixDomainSocket(UnixDomainSocket.getDefaultInstance).build())
+      .build())
+    .build())
+  .build()
+
+// 2. Create a dispatcher. Use a protocol-specific subclass of
+//    DirectWorkerDispatcher (e.g., gRPC over UDS).
+val dispatcher: WorkerDispatcher = ...
+
+// 3. Create a session for one UDF execution.
+val session = dispatcher.createSession(securityScope = None)
+try {
+  // 4. Initialize with the serialized function and schemas.
+  session.init(InitMessage(
+    functionPayload = serializedFunction,
+    inputSchema = arrowInputSchema,
+    outputSchema = arrowOutputSchema))
+
+  // 5. Process data -- Iterator in, Iterator out.
+  val results: Iterator[Array[Byte]] =
+    session.process(inputBatches)
+
+  // Consume results lazily.
+  results.foreach(processResultBatch)
+} finally {
+  session.close()
+}
+
+// 6. Shut down all workers.
+dispatcher.close()
 ```
 
 ## Build
 
 SBT:
 ```
-build/sbt "udf-worker-core/compile"
-build/sbt "udf-worker-core/test"
+build/sbt "udf-worker-proto/compile" "udf-worker-core/compile"
 ```
 
 Maven:
 ```
-./build/mvn -pl udf/worker/proto,udf/worker/core -am compile
-./build/mvn -pl udf/worker/proto,udf/worker/core -am test
+build/mvn compile -pl udf/worker/proto,udf/worker/core -am
 ```
 
+## Test
+
+SBT:
+```
+build/sbt "udf-worker-core/test"
+```
+
+## Current status
+
+This is the **first MVP** providing the core abstraction layer and the
+direct worker dispatcher.
+The following are left as TODOs:
+
+- **Connection pooling** -- reuse workers across sessions
+- **Security scope isolation** -- partition pools by `WorkerSecurityScope`
+- **Indirect worker creation** -- obtain workers from a service or daemon
+- **Protocol-specific implementations** -- e.g., gRPC over UDS
+
 ## Design references
 
 * [SPIP Language-agnostic UDF Protocol for Spark](https://docs.google.com/document/d/19Whzq127QxVt2Luk0EClgaDtcpBsFUp67NcVdKKyPF8/edit?tab=t.0)
diff --git a/udf/worker/core/pom.xml b/udf/worker/core/pom.xml
index 3ef1ded37ba3a..f09e8b722ec46 100644
--- a/udf/worker/core/pom.xml
+++ b/udf/worker/core/pom.xml
@@ -24,7 +24,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/UnixSocketWorkerConnection.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/UnixSocketWorkerConnection.scala
new file mode 100644
index 0000000000000..b3b40d16e7443
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/UnixSocketWorkerConnection.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core
+
+import java.io.File
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * A [[WorkerConnection]] over a Unix domain socket. Owns the socket
+ * path and removes the socket file on [[close]]. Subclasses provide the
+ * protocol-specific channel (e.g. gRPC over UDS) and may override
+ * [[close]] to add transport-level shutdown -- they should call
+ * `super.close()` to ensure the socket file is removed.
+ *
+ * [[close]] is idempotent: deleting an already-removed file is a no-op.
+ */
+@Experimental
+abstract class UnixSocketWorkerConnection(val socketPath: String)
+  extends WorkerConnection {
+
+  override def close(): Unit = {
+    val f = new File(socketPath)
+    if (f.exists()) f.delete()
+  }
+}
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerConnection.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerConnection.scala
new file mode 100644
index 0000000000000..82b2fff8df585
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerConnection.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * A transport-level connection to a running UDF worker process.
+ *
+ * A [[WorkerConnection]] represents the communication channel between the
+ * Spark engine and a single worker process (e.g., a gRPC channel over a
+ * Unix domain socket, or a raw TCP socket). It is owned by a worker
+ * process wrapper (e.g., [[direct.DirectWorkerProcess]]) and shared
+ * across all [[WorkerSession]]s that use that process.
+ *
+ * One connection, many sessions: the worker exposes a single server-side
+ * endpoint that all sessions share. For gRPC, per-session work lives on
+ * multiplexed streams over this channel.
+ *
+ * Implementations expose only lifecycle. Data transmission happens at
+ * the [[WorkerSession]] level -- this class is solely about whether the
+ * channel is open.
+ *
+ * '''Relationship to other classes (direct creation mode):'''
+ * {{{
+ *   DirectWorkerProcess  1 --- 1  WorkerConnection   (transport over UDS)
+ *   DirectWorkerProcess  1 --- *  WorkerSession      (UDF executions)
+ * }}}
+ */
+@Experimental
+abstract class WorkerConnection extends AutoCloseable {
+  /** Returns true if the underlying transport channel is still usable. */
+  def isActive: Boolean
+}
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerDispatcher.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerDispatcher.scala
index 58fabbaea00df..008cfc2993a09 100644
--- a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerDispatcher.scala
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerDispatcher.scala
@@ -17,11 +17,11 @@
 package org.apache.spark.udf.worker.core
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.udf.worker.WorkerSpecification
+import org.apache.spark.udf.worker.UDFWorkerSpecification
 
 /**
  * :: Experimental ::
- * Manages workers for a single [[WorkerSpecification]] and hides worker details from Spark.
+ * Manages workers for a single [[UDFWorkerSpecification]] and hides worker details from Spark.
  *
  * A [[WorkerDispatcher]] is created from a worker specification (plus context such
  * as security scope). It owns the underlying worker processes and connections,
@@ -31,7 +31,7 @@ import org.apache.spark.udf.worker.WorkerSpecification
 @Experimental
 trait WorkerDispatcher extends AutoCloseable {
 
-  def workerSpec: WorkerSpecification
+  def workerSpec: UDFWorkerSpecification
 
   /**
    * Creates a [[WorkerSession]] that maps to one single UDF execution.
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerLogger.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerLogger.scala
new file mode 100644
index 0000000000000..a8f135f688908
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerLogger.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * Minimal logging surface used by the udf/worker framework.
+ *
+ * The framework deliberately does not depend on SLF4J (or any other
+ * concrete logging backend) so callers can embed it without dragging a
+ * specific logger onto the classpath. Embedders should supply an
+ * adapter that forwards to their preferred backend (Spark's `Logging`
+ * trait, SLF4J, java.util.logging, etc.).
+ *
+ * Only the methods actually used by the framework are exposed.
+ * Messages are passed by-name so the formatting cost is avoided when
+ * the backend decides to drop the event.
+ */
+@Experimental
+trait WorkerLogger {
+  def warn(msg: => String): Unit
+  def warn(msg: => String, t: Throwable): Unit
+  def debug(msg: => String): Unit
+  def debug(msg: => String, t: Throwable): Unit
+}
+
+object WorkerLogger {
+  /** Discards all messages. Default for callers that don't wire up logging. */
+  val NoOp: WorkerLogger = new WorkerLogger {
+    override def warn(msg: => String): Unit = ()
+    override def warn(msg: => String, t: Throwable): Unit = ()
+    override def debug(msg: => String): Unit = ()
+    override def debug(msg: => String, t: Throwable): Unit = ()
+  }
+}
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerSession.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerSession.scala
index 83c392a895b66..f4c4091688c94 100644
--- a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerSession.scala
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/WorkerSession.scala
@@ -16,22 +16,133 @@
  */
 package org.apache.spark.udf.worker.core
 
+import java.util.concurrent.atomic.AtomicBoolean
+
 import org.apache.spark.annotation.Experimental
 
 /**
  * :: Experimental ::
- * Represents one single UDF execution.
+ * Carries all information needed to initialize a UDF execution on a worker.
+ *
+ * This message is passed to [[WorkerSession#init]] and contains the function
+ * definition, schemas, and any additional configuration.
  *
- * A [[WorkerSession]] is obtained from [[WorkerDispatcher#createSession]] and
- * can carry per-execution state for that UDF invocation. Implementations may
- * add concrete data-processing methods and lifecycle hooks as needed.
+ * Placeholder: will be replaced by a generated proto message once the
+ * UDF wire protocol lands. Do not rely on case-class equality --
+ * `Array[Byte]` fields compare by reference.
  *
- * A WorkerSession is not 1-to-1 mapped to an actual worker process. Multiple
- * WorkerSessions may be backed by the same worker when the worker is reused.
- * Worker reuse and pooling are managed by each [[WorkerDispatcher]]
- * implementation based on the [[WorkerSpecification]].
+ * @param functionPayload serialized function (e.g., pickled Python, JVM bytes)
+ * @param inputSchema     serialized input schema (e.g., Arrow schema bytes)
+ * @param outputSchema    serialized output schema (e.g., Arrow schema bytes)
+ * @param properties      additional key-value configuration. Can carry
+ *                        protocol-specific or engine-specific metadata that
+ *                        does not yet have a dedicated field.
+ */
+@Experimental
+case class InitMessage(
+    functionPayload: Array[Byte],
+    inputSchema: Array[Byte],
+    outputSchema: Array[Byte],
+    properties: Map[String, String] = Map.empty)
+
+/**
+ * :: Experimental ::
+ * One UDF execution on a worker -- the main interface Spark uses to run UDFs.
+ *
+ * A [[WorkerSession]] is the '''per-UDF-invocation''' handle that Spark
+ * obtains from [[WorkerDispatcher#createSession]]. It carries the full
+ * init / data-stream / finish lifecycle for a single UDF evaluation.
+ *
+ * A [[WorkerSession]] does ''not'' own the underlying worker or its
+ * transport channel -- those are managed by the [[WorkerDispatcher]].
+ * Multiple sessions may share the same worker when the worker supports
+ * concurrency.
+ *
+ * '''Usage:'''
+ * {{{
+ *   val session = dispatcher.createSession(securityScope = None)
+ *   try {
+ *     session.init(InitMessage(functionPayload, inputSchema, outputSchema))
+ *     val results = session.process(inputBatches)
+ *     results.foreach(handleBatch)
+ *   } finally {
+ *     session.close()
+ *   }
+ * }}}
+ *
+ * '''Lifecycle:'''
+ *  - [[init]] must be called exactly once before [[process]].
+ *  - [[process]] must be called at most once per session.
+ *  - [[close]] must always be called (use try-finally).
+ *  - [[cancel]] may be called at any time to abort execution.
+ *
+ * The lifecycle is enforced here: [[init]] and [[process]] are `final`
+ * and delegate to [[doInit]] / [[doProcess]] after AtomicBoolean guards.
+ * Subclasses implement the protocol-specific work and do not re-check
+ * the contract.
  */
 @Experimental
 abstract class WorkerSession extends AutoCloseable {
-  override def close(): Unit = {}
+
+  private val initialized = new AtomicBoolean(false)
+  private val processed = new AtomicBoolean(false)
+
+  /**
+   * Initializes the UDF execution. Must be called exactly once before
+   * [[process]].
+   *
+   * Throws `IllegalStateException` if called more than once.
+   *
+   * @param message the initialization parameters including the serialized
+   *                function, input/output schemas, and configuration.
+   */
+  final def init(message: InitMessage): Unit = {
+    if (!initialized.compareAndSet(false, true)) {
+      throw new IllegalStateException("init has already been called on this session")
+    }
+    doInit(message)
+  }
+
+  /**
+   * Processes input data through the worker and returns results.
+   *
+   * Follows Spark's Iterator-to-Iterator pattern: input batches are streamed
+   * to the worker, and result batches are lazily pulled from the returned
+   * iterator. The session sends a Finish signal to the worker when the input
+   * iterator is exhausted.
+   *
+   * Must be called after [[init]] and at most once per session.
+   * Throws `IllegalStateException` if called before [[init]] or more than once.
+   *
+   * @param input iterator of raw input data batches (e.g., Arrow IPC)
+   * @return iterator of raw result data batches
+   */
+  final def process(input: Iterator[Array[Byte]]): Iterator[Array[Byte]] = {
+    if (!initialized.get()) {
+      throw new IllegalStateException("process called before init")
+    }
+    if (!processed.compareAndSet(false, true)) {
+      throw new IllegalStateException("process has already been called on this session")
+    }
+    doProcess(input)
+  }
+
+  /** Subclass hook for [[init]]. Called once, after the guard. */
+  protected def doInit(message: InitMessage): Unit
+
+  /** Subclass hook for [[process]]. Called at most once, after the guard. */
+  protected def doProcess(input: Iterator[Array[Byte]]): Iterator[Array[Byte]]
+
+  /**
+   * Requests cancellation of the current UDF execution.
+   *
+   * '''Thread-safety:''' implementations must allow [[cancel]] to be called
+   * from a thread different from the one driving [[process]] (typically a
+   * task interruption thread). It may be invoked at any point after
+   * [[init]] and should be a no-op if execution has already finished.
+   */
+  def cancel(): Unit
+
+  /** Closes this session and releases resources. */
+  override def close(): Unit
 }
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectUnixSocketWorkerDispatcher.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectUnixSocketWorkerDispatcher.scala
new file mode 100644
index 0000000000000..8da0354187e4f
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectUnixSocketWorkerDispatcher.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core.direct
+
+import java.io.File
+import java.nio.file.{Files, Path}
+import java.nio.file.attribute.PosixFilePermissions
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.udf.worker.UDFWorkerSpecification
+import org.apache.spark.udf.worker.core.{UnixSocketWorkerConnection, WorkerLogger}
+import org.apache.spark.udf.worker.core.direct.DirectWorkerDispatcher.SOCKET_POLL_INTERVAL_MS
+
+/**
+ * :: Experimental ::
+ * A [[DirectWorkerDispatcher]] using Unix domain sockets as the worker
+ * transport. Allocates a private 0700 socket directory at construction;
+ * each worker is given a UDS path inside it.
+ *
+ * Concrete subclasses implement [[createConnection]] (with a UDS protocol
+ * of choice) and [[createSessionForWorker]].
+ */
+@Experimental
+abstract class DirectUnixSocketWorkerDispatcher(
+    workerSpec: UDFWorkerSpecification,
+    logger: WorkerLogger = WorkerLogger.NoOp)
+  extends DirectWorkerDispatcher(workerSpec, logger) {
+
+  // Removed explicitly in closeTransport(). deleteOnExit is avoided because
+  // the JDK retains the path for the JVM lifetime, which leaks in
+  // long-lived drivers.
+  private val socketDir: Path = createPrivateTempDirectory()
+
+  override protected def newEndpointAddress(workerId: String): String =
+    socketDir.resolve(s"worker-$workerId.sock").toString
+
+  override protected def waitForReady(
+      address: String,
+      process: Process,
+      outputFile: File): Unit = {
+    val file = new File(address)
+    // At least one poll so very small initTimeouts don't trip a premature
+    // timeout before the worker has any chance to create the socket.
+    val maxAttempts = math.max(1, (initTimeoutMs / SOCKET_POLL_INTERVAL_MS).toInt)
+    var attempts = 0
+    while (!file.exists() && attempts < maxAttempts) {
+      if (!process.isAlive) throwWorkerExitedBeforeSocket(process, address, outputFile)
+      Thread.sleep(SOCKET_POLL_INTERVAL_MS)
+      attempts += 1
+    }
+    if (!file.exists()) {
+      if (process.isAlive) {
+        DirectWorkerDispatcher.destroyForciblyAndReap(
+          process, logger, s"init timeout $address")
+        val tail = readOutputTail(outputFile)
+        throw new DirectWorkerTimeoutException(
+          s"Worker did not create socket at $address within ${initTimeoutMs}ms\n$tail")
+      } else {
+        // Worker exited after the last poll without creating the socket;
+        // prefer the exit-code message over the ambiguous "did not create".
+        throwWorkerExitedBeforeSocket(process, address, outputFile)
+      }
+    }
+  }
+
+  override protected def cleanupEndpointAddress(address: String): Unit = {
+    Files.deleteIfExists(new File(address).toPath)
+  }
+
+  override protected def closeTransport(): Unit = {
+    val dir = socketDir.toFile
+    if (dir.exists()) {
+      val remaining = dir.listFiles()
+      if (remaining != null) remaining.foreach(_.delete())
+      dir.delete()
+    }
+  }
+
+  override protected def validateTransportSupport(): Unit = {
+    val props = workerSpec.getDirect.getProperties
+    require(props.hasConnection,
+      "DirectWorker.properties.connection must be set")
+    val conn = props.getConnection
+    require(conn.hasUnixDomainSocket,
+      "DirectUnixSocketWorkerDispatcher requires UNIX domain socket transport, " +
+        s"got ${conn.getTransportCase}")
+  }
+
+  override protected def createConnection(address: String): UnixSocketWorkerConnection
+
+  private def throwWorkerExitedBeforeSocket(
+      process: Process,
+      address: String,
+      outputFile: File): Nothing = {
+    val tail = readOutputTail(outputFile)
+    throw new DirectWorkerException(
+      s"Worker exited with code ${process.exitValue()} " +
+        s"before creating socket at $address\n$tail")
+  }
+
+  /**
+   * Creates a temp directory with owner-only permissions (0700 on POSIX).
+   * On non-POSIX filesystems falls back to best-effort `File.setXxx`,
+   * which is TOCTOU-racy and weaker; a WARN surfaces if the platform
+   * refuses the setters.
+   */
+  private def createPrivateTempDirectory(): Path = {
+    val attr = PosixFilePermissions.asFileAttribute(
+      PosixFilePermissions.fromString("rwx------"))
+    try {
+      Files.createTempDirectory("spark-udf-worker", attr)
+    } catch {
+      case _: UnsupportedOperationException =>
+        val dir = Files.createTempDirectory("spark-udf-worker")
+        val f = dir.toFile
+        // `&` (non-short-circuiting) so every setter is attempted even if
+        // an earlier one refused.
+        val applied =
+          f.setReadable(false, false) & f.setWritable(false, false) &
+            f.setExecutable(false, false) & f.setReadable(true, true) &
+            f.setWritable(true, true) & f.setExecutable(true, true)
+        if (!applied) {
+          logger.warn(
+            s"Could not fully restrict permissions on $dir; socket " +
+              s"directory may be accessible to other local users on this " +
+              s"filesystem")
+        }
+        dir
+    }
+  }
+}
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerDispatcher.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerDispatcher.scala
new file mode 100644
index 0000000000000..afaf23791d80f
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerDispatcher.scala
@@ -0,0 +1,532 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core.direct
+
+import java.io.{BufferedReader, File, FileInputStream, InputStreamReader}
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Path}
+import java.util.UUID
+import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.collection.mutable.{Queue => MQueue}
+import scala.jdk.CollectionConverters._
+import scala.util.control.NonFatal
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.udf.worker.{ProcessCallable, UDFWorkerSpecification}
+import org.apache.spark.udf.worker.core.{WorkerConnection, WorkerDispatcher,
+  WorkerLogger, WorkerSecurityScope, WorkerSession}
+import org.apache.spark.udf.worker.core.direct.DirectWorkerDispatcher.{CallableResult,
+  DEFAULT_CALLABLE_TIMEOUT_MS, DEFAULT_GRACEFUL_TIMEOUT_MS, DEFAULT_INIT_TIMEOUT_MS,
+  ENGINE_MAX_TIMEOUT_MS, EnvironmentState, MAX_OUTPUT_SCAN_BYTES,
+  PROCESS_OUTPUT_TAIL_LINES}
+
+/**
+ * :: Experimental ::
+ * A [[WorkerDispatcher]] that creates workers by spawning local OS processes
+ * ("direct" creation mode from the worker specification).
+ *
+ * On the first [[createSession]], the dispatcher ensures the environment is
+ * ready (verify / install) and registers the cleanup hook. Each session
+ * currently gets a fresh worker that is terminated when the session closes
+ * (the single-reference case of the future pooling policy).
+ *
+ * Subclasses implement [[createConnection]] and [[createSessionForWorker]]
+ * to provide protocol-specific behavior (e.g., gRPC, raw sockets).
+ *
+ * For workers obtained through a provisioning service or daemon (indirect
+ * creation), see the `indirect` package (TODO).
+ *
+ * @param workerSpec worker specification (proto)
+ * @param logger [[WorkerLogger]] used for dispatcher-internal messages.
+ *               The framework does not depend on any concrete logging
+ *               backend; callers should pass an adapter that forwards
+ *               to their preferred logger (Spark's `Logging` trait,
+ *               SLF4J, etc.). Defaults to [[WorkerLogger.NoOp]].
+ */
+@Experimental
+abstract class DirectWorkerDispatcher(
+    override val workerSpec: UDFWorkerSpecification,
+    protected val logger: WorkerLogger = WorkerLogger.NoOp)
+  extends WorkerDispatcher {
+
+  // TODO: Connection pooling -- reuse idle workers across sessions.
+  // TODO: Security scope isolation -- partition pool by WorkerSecurityScope.
+
+  validateTransportSupport()
+  validateEnvironmentCallables()
+
+  /**
+   * Maximum time to wait for a setup/verify/cleanup callable to finish.
+   * Subclasses may override this to accommodate slow installation steps
+   * (e.g., a large dependency install). Defaults to 120 seconds.
+   */
+  protected def callableTimeoutMs: Long = DEFAULT_CALLABLE_TIMEOUT_MS
+
+  // Proto-provided timeouts are clamped to ENGINE_MAX_TIMEOUT_MS. The
+  // dispatcher-internal callableTimeoutMs above is subclass-controlled and
+  // not subject to the cap.
+  // Package-private for test access.
+  private[core] val initTimeoutMs: Long = {
+    val props = workerSpec.getDirect.getProperties
+    val raw = if (props.hasInitializationTimeoutMs && props.getInitializationTimeoutMs > 0) {
+      props.getInitializationTimeoutMs.toLong
+    } else {
+      DEFAULT_INIT_TIMEOUT_MS
+    }
+    clampTimeout("initialization_timeout_ms", raw)
+  }
+
+  private val gracefulTimeoutMs: Long = {
+    val props = workerSpec.getDirect.getProperties
+    val raw = if (props.hasGracefulTerminationTimeoutMs &&
+      props.getGracefulTerminationTimeoutMs > 0) {
+      props.getGracefulTerminationTimeoutMs.toLong
+    } else {
+      DEFAULT_GRACEFUL_TIMEOUT_MS
+    }
+    clampTimeout("graceful_termination_timeout_ms", raw)
+  }
+
+  private def clampTimeout(field: String, raw: Long): Long = {
+    if (raw > ENGINE_MAX_TIMEOUT_MS) {
+      logger.warn(
+        s"Worker-provided $field=${raw}ms exceeds engine maximum " +
+          s"${ENGINE_MAX_TIMEOUT_MS}ms; using ${ENGINE_MAX_TIMEOUT_MS}ms instead")
+      ENGINE_MAX_TIMEOUT_MS
+    } else {
+      raw
+    }
+  }
+
+  private[this] val workers = new ConcurrentHashMap[String, DirectWorkerProcess]()
+  private[this] val closed = new AtomicBoolean(false)
+
+  @volatile private var environmentState: EnvironmentState = EnvironmentState.Pending
+  private val environmentLock = new Object
+  private[this] var cleanupHook: Option[Thread] = None
+
+  /**
+   * Allocates a fresh endpoint address for a new worker. The string is
+   * passed to the worker binary as `--connection <address>`.
+   */
+  protected def newEndpointAddress(workerId: String): String
+
+  /**
+   * Waits for the worker process to be ready to accept connections at
+   * `address`. Throws [[DirectWorkerTimeoutException]] on timeout, or
+   * [[DirectWorkerException]] if the process exits early.
+   */
+  protected def waitForReady(
+      address: String,
+      process: Process,
+      outputFile: File): Unit
+
+  /**
+   * Best-effort per-endpoint cleanup, called from the spawn-failure path
+   * before any [[WorkerArtifacts]] / [[WorkerConnection]] exists.
+   */
+  protected def cleanupEndpointAddress(address: String): Unit
+
+  /**
+   * Cleans up dispatcher-level transport state (e.g., a UDS socket
+   * directory). Called from [[close]].
+   */
+  protected def closeTransport(): Unit
+
+  /**
+   * Validates the worker spec's transport choice. Subclasses declare
+   * which transports they support. Called from the base constructor;
+   * implementations must only read base-class state (`workerSpec`).
+   */
+  protected def validateTransportSupport(): Unit
+
+  /** Creates a protocol-specific connection to a worker at the given address. */
+  protected def createConnection(address: String): WorkerConnection
+
+  /** Creates a protocol-specific session for the given worker. */
+  protected def createSessionForWorker(worker: DirectWorkerProcess): WorkerSession
+
+  override def createSession(
+      securityScope: Option[WorkerSecurityScope]): WorkerSession = {
+    require(securityScope.isEmpty,
+      "securityScope is not supported yet; pass None until pooling lands")
+    if (closed.get()) throwClosed()
+    ensureEnvironmentReady()
+    val worker = spawnWorker()
+    // Acquire before publish: a concurrent close() iterating `workers` must
+    // not tear down this worker before we hand it to the caller.
+    worker.acquireSession()
+    workers.put(worker.id, worker)
+    // Re-check for close() that ran concurrently. Releasing fires the
+    // ref-count callback, which removes and tears down the worker.
+    if (closed.get()) {
+      worker.releaseSession()
+      throwClosed()
+    }
+    try {
+      createSessionForWorker(worker)
+    } catch {
+      case e: InterruptedException =>
+        Thread.currentThread().interrupt()
+        worker.releaseSession()
+        throw e
+      case NonFatal(e) =>
+        worker.releaseSession()
+        throw e
+    }
+  }
+
+  /**
+   * Invoked when a worker's last session closes. Terminates the worker
+   * today; future pooling can reuse it here instead. Safe to call after
+   * dispatcher close -- the worker's own CAS-idempotent close makes a
+   * second teardown a no-op.
+   */
+  private def releaseWorker(worker: DirectWorkerProcess): Unit = {
+    workers.remove(worker.id)
+    try {
+      worker.close()
+    } catch {
+      case NonFatal(e) =>
+        logger.warn(s"Error closing worker ${worker.id}", e)
+    }
+  }
+
+  private def throwClosed(): Nothing =
+    throw new IllegalStateException("Dispatcher is closed")
+
+  /**
+   * Terminates tracked workers, removes the socket directory, and runs
+   * environment cleanup. Idempotent via CAS. Does not drain in-flight
+   * createSession calls -- a worker spawned racing with close tears
+   * itself down through the ref-count callback, which may outlive this
+   * method.
+   */
+  override def close(): Unit = {
+    if (!closed.compareAndSet(false, true)) {
+      return
+    }
+    // TODO: close workers in parallel -- today shutdown is serialised at
+    //   N * gracefulTimeoutMs worst case.
+    workers.values().iterator().asScala.foreach { w =>
+      try {
+        w.close()
+      } catch {
+        case NonFatal(e) =>
+          logger.warn(s"Error closing worker ${w.id}", e)
+      }
+    }
+    workers.clear()
+    try closeTransport() catch {
+      case NonFatal(e) =>
+        logger.warn("Error cleaning up transport state", e)
+    }
+    deregisterEnvironmentCleanupHook()
+    runEnvironmentCleanup()
+  }
+
+  // -- Environment lifecycle -------------------------------------------------
+
+  // TODO: distinguish retriable vs permanent environment failures.
+  private def ensureEnvironmentReady(): Unit = {
+    environmentLock.synchronized {
+      environmentState match {
+        case EnvironmentState.Ready | EnvironmentState.CleanedUp =>
+        case EnvironmentState.Failed(msg) =>
+          throw new DirectWorkerException(s"Environment setup previously failed: $msg")
+        case EnvironmentState.Pending =>
+          val env = workerSpec.getEnvironment
+          // Register up front so a partially-successful install still gets
+          // torn down at JVM shutdown if dispatcher.close is never called.
+          // No-op when environment_cleanup is not configured.
+          registerEnvironmentCleanupHook()
+          val verified = env.hasEnvironmentVerification &&
+            runCallable(env.getEnvironmentVerification).exitCode == 0
+          if (!verified && env.hasInstallation) {
+            // Treat any install failure (timeout or non-zero exit) as
+            // permanent. A partially-completed install can leave files on
+            // disk that a retry would race with; retry policy belongs in
+            // the future predicate (see TODO above).
+            val result = try {
+              runCallable(env.getInstallation)
+            } catch {
+              case e: DirectWorkerException =>
+                environmentState = EnvironmentState.Failed(
+                  s"installation failed: ${e.getMessage}")
+                throw e
+            }
+            if (result.exitCode != 0) {
+              val detail = s"exit code ${result.exitCode}\n${result.outputTail}"
+              environmentState = EnvironmentState.Failed(detail)
+              throw new DirectWorkerException(
+                s"Environment installation failed with $detail")
+            }
+          }
+          environmentState = EnvironmentState.Ready
+      }
+    }
+  }
+
+  // TODO: share one JVM shutdown hook across all dispatchers in the
+  //   process. Each live dispatcher is retained by the JVM until shutdown.
+
+  /** Registers the JVM shutdown hook that runs the cleanup callable. */
+  private def registerEnvironmentCleanupHook(): Unit = {
+    if (!Thread.holdsLock(environmentLock)) {
+      throw new IllegalStateException(
+        "registerEnvironmentCleanupHook must be called while holding environmentLock")
+    }
+    if (cleanupHook.isDefined) return
+    if (workerSpec.getEnvironment.hasEnvironmentCleanup) {
+      val hook = new Thread(() => runEnvironmentCleanup(), "udf-env-cleanup")
+      cleanupHook = Some(hook)
+      // scalastyle:off runtimeaddshutdownhook
+      Runtime.getRuntime.addShutdownHook(hook)
+      // scalastyle:on runtimeaddshutdownhook
+    }
+  }
+
+  private def deregisterEnvironmentCleanupHook(): Unit = {
+    environmentLock.synchronized {
+      cleanupHook.foreach { hook =>
+        try {
+          Runtime.getRuntime.removeShutdownHook(hook)
+        } catch {
+          case _: IllegalStateException => // JVM already shutting down
+        }
+        cleanupHook = None
+      }
+    }
+  }
+
+  private def runEnvironmentCleanup(): Unit = {
+    environmentLock.synchronized {
+      environmentState match {
+        case EnvironmentState.CleanedUp =>
+        case _ =>
+          if (workerSpec.getEnvironment.hasEnvironmentCleanup) {
+            try {
+              val result = runCallable(workerSpec.getEnvironment.getEnvironmentCleanup)
+              if (result.exitCode != 0) {
+                logger.warn(s"Environment cleanup exited with code ${result.exitCode}" +
+                  s"\n${result.outputTail}")
+              }
+            } catch {
+              case NonFatal(e) => logger.warn("Environment cleanup failed", e)
+            }
+          }
+          environmentState = EnvironmentState.CleanedUp
+      }
+    }
+  }
+
+  // -- Process helpers -------------------------------------------------------
+
+  /**
+   * Runs a [[ProcessCallable]] synchronously and returns the result.
+   * Always throws on timeout; callers check `exitCode` for non-timeout failures.
+   */
+  private[core] def runCallable(callable: ProcessCallable): CallableResult = {
+    val cmd = (callable.getCommandList.asScala ++ callable.getArgumentsList.asScala).toSeq
+    require(cmd.nonEmpty,
+      "ProcessCallable must have at least one entry in command or arguments")
+    val outputFile = Files.createTempFile("udf-callable-", ".log")
+    try {
+      val process = launchProcess(
+        cmd, callable.getEnvironmentVariablesMap.asScala.toMap, outputFile.toFile)
+      val timeoutMs = callableTimeoutMs
+      if (!process.waitFor(timeoutMs, TimeUnit.MILLISECONDS)) {
+        DirectWorkerDispatcher.destroyForciblyAndReap(
+          process, logger, s"callable timeout: ${cmd.head}")
+        val tail = readOutputTail(outputFile.toFile)
+        throw new DirectWorkerTimeoutException(
+          s"Callable timed out after ${timeoutMs}ms: " +
+            s"${cmd.mkString(" ")}\n$tail")
+      }
+      val tail = readOutputTail(outputFile.toFile)
+      CallableResult(process.exitValue(), tail)
+    } finally {
+      Files.deleteIfExists(outputFile)
+    }
+  }
+
+  private def spawnWorker(): DirectWorkerProcess = {
+    val runner = workerSpec.getDirect.getRunner
+    val baseCmd = (runner.getCommandList.asScala ++ runner.getArgumentsList.asScala).toSeq
+    require(baseCmd.nonEmpty,
+      "DirectWorker.runner must have at least one entry in command or arguments")
+    val workerId = UUID.randomUUID().toString
+    val address = newEndpointAddress(workerId)
+    // Proto contract: the engine must pass --id and --connection.
+    val cmd = baseCmd ++ Seq("--id", workerId, "--connection", address)
+    val env = runner.getEnvironmentVariablesMap.asScala.toMap
+    val outputFile = Files.createTempFile("udf-worker-", ".log")
+    val process = launchProcess(cmd, env, outputFile.toFile)
+
+    try {
+      waitForReady(address, process, outputFile.toFile)
+      val connection = createConnection(address)
+      val artifacts = new WorkerArtifacts(process, connection, outputFile, logger)
+      new DirectWorkerProcess(
+        workerId, artifacts, gracefulTimeoutMs, logger,
+        onLastSessionReleased = releaseWorker)
+    } catch {
+      case e: InterruptedException =>
+        Thread.currentThread().interrupt()
+        cleanupRawSpawn(process, address, outputFile)
+        throw e
+      case NonFatal(e) =>
+        cleanupRawSpawn(process, address, outputFile)
+        throw e
+    }
+  }
+
+  // Pre-WorkerArtifacts cleanup: the connection has not been built yet,
+  // so we have no bundle to close(). Each step is independent.
+  private def cleanupRawSpawn(p: Process, address: String, outputFile: Path): Unit = {
+    DirectWorkerDispatcher.destroyForciblyAndReap(p, logger, "failed spawn")
+    try cleanupEndpointAddress(address) catch {
+      case NonFatal(e) =>
+        logger.debug(s"Failed to clean up endpoint address $address", e)
+    }
+    try Files.deleteIfExists(outputFile) catch {
+      case NonFatal(e) =>
+        logger.debug(s"Failed to clean up worker output file $outputFile", e)
+    }
+  }
+
+  /**
+   * Starts an OS process. stdout and stderr are merged and redirected to the
+   * given file so that output can be read back for error reporting.
+   */
+  private def launchProcess(
+      command: Seq[String],
+      env: Map[String, String],
+      outputFile: File): Process = {
+    val builder = new ProcessBuilder(command: _*)
+    env.foreach { case (k, v) => builder.environment().put(k, v) }
+    builder.redirectErrorStream(true)
+    builder.redirectOutput(outputFile)
+    builder.start()
+  }
+
+  // Bounded scan so a runaway worker that writes gigabytes of output does
+  // not OOM the caller during error reporting.
+  protected def readOutputTail(file: File): String = {
+    if (!file.exists() || file.length() == 0) return ""
+    val fileLen = file.length()
+    val startPos = math.max(0L, fileLen - MAX_OUTPUT_SCAN_BYTES)
+    val fis = new FileInputStream(file)
+    try {
+      if (startPos > 0) fis.getChannel.position(startPos)
+      val reader = new BufferedReader(
+        new InputStreamReader(fis, StandardCharsets.UTF_8))
+      // Discard the first (partial) line when we seeked into the middle.
+      if (startPos > 0) reader.readLine()
+      val buffer = new MQueue[String]()
+      var line = reader.readLine()
+      while (line != null) {
+        if (buffer.size >= PROCESS_OUTPUT_TAIL_LINES) buffer.dequeue()
+        buffer.enqueue(line)
+        line = reader.readLine()
+      }
+      if (buffer.isEmpty) ""
+      else "Process output (last lines):\n" + buffer.mkString("\n")
+    } catch {
+      case NonFatal(e) =>
+        logger.debug(s"Failed to read process output from $file", e)
+        ""
+    } finally {
+      fis.close()
+    }
+  }
+
+  // -- Spec validation -------------------------------------------------------
+
+  // Verification exists to short-circuit installation when the environment
+  // is already prepared, so requiring installation alongside verification
+  // catches user errors at spec-validation time.
+  private def validateEnvironmentCallables(): Unit = {
+    val env = workerSpec.getEnvironment
+    require(!env.hasEnvironmentVerification || env.hasInstallation,
+      "WorkerEnvironment.environment_verification requires installation to be set")
+  }
+}
+
+private[direct] object DirectWorkerDispatcher {
+  private[direct] val SOCKET_POLL_INTERVAL_MS = 100L
+  private[direct] val DEFAULT_INIT_TIMEOUT_MS = 10000L
+  private[direct] val DEFAULT_CALLABLE_TIMEOUT_MS = 120000L
+  private[direct] val DEFAULT_GRACEFUL_TIMEOUT_MS = 5000L
+  // Engine-side cap on proto-provided worker timeouts. The defaults below
+  // must stay at or under this cap so the clamp only fires on
+  // user-provided values.
+  private[direct] val ENGINE_MAX_TIMEOUT_MS = 30000L
+  require(DEFAULT_INIT_TIMEOUT_MS <= ENGINE_MAX_TIMEOUT_MS &&
+    DEFAULT_GRACEFUL_TIMEOUT_MS <= ENGINE_MAX_TIMEOUT_MS,
+    "default timeouts must not exceed ENGINE_MAX_TIMEOUT_MS")
+  private[direct] val PROCESS_OUTPUT_TAIL_LINES = 50
+  private[direct] val MAX_OUTPUT_SCAN_BYTES = 1024L * 1024L // 1 MiB
+  // 5s bounds the wait for the kernel to reap a SIGKILL'd child. SIGKILL
+  // is unblockable, so exceeding this usually means the process is stuck
+  // in uninterruptible I/O (D-state) and further waiting will not help.
+  private[direct] val SIGKILL_REAP_TIMEOUT_MS = 5000L
+
+  /**
+   * SIGKILL `process` and wait up to [[SIGKILL_REAP_TIMEOUT_MS]] for the
+   * kernel to reap it. `destroyForcibly()` alone returns before the child
+   * is reaped, which leaks a zombie until JVM exit. On reap-timeout logs
+   * a warning; on interrupt re-raises the interrupt and returns.
+   *
+   * @param context short tag included in the timeout warning so operators
+   *                can correlate a stuck child with its source.
+   */
+  private[direct] def destroyForciblyAndReap(
+      process: Process,
+      logger: WorkerLogger,
+      context: String = ""): Unit = {
+    if (!process.isAlive) return
+    process.destroyForcibly()
+    val reaped = try {
+      process.waitFor(SIGKILL_REAP_TIMEOUT_MS, TimeUnit.MILLISECONDS)
+    } catch {
+      case _: InterruptedException =>
+        Thread.currentThread().interrupt()
+        return
+    }
+    if (!reaped && process.isAlive) {
+      val suffix = if (context.nonEmpty) s" [$context]" else ""
+      logger.warn(
+        s"Process ${process.pid()}$suffix still alive ${SIGKILL_REAP_TIMEOUT_MS}ms " +
+          s"after SIGKILL; leaving behind as zombie " +
+          s"(likely stuck in uninterruptible kernel state)")
+    }
+  }
+
+  /** Result of running a [[ProcessCallable]]. */
+  private[core] case class CallableResult(exitCode: Int, outputTail: String)
+
+  private[direct] sealed trait EnvironmentState
+  private[direct] object EnvironmentState {
+    case object Pending extends EnvironmentState
+    case object Ready extends EnvironmentState
+    case class Failed(detail: String) extends EnvironmentState
+    case object CleanedUp extends EnvironmentState
+  }
+}
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerException.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerException.scala
new file mode 100644
index 0000000000000..b0ece15eae38f
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerException.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core.direct
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * Thrown by [[DirectWorkerDispatcher]] for runtime failures: worker
+ * spawn problems, environment setup or cleanup failures, callable
+ * timeouts, and socket-establishment timeouts.
+ *
+ * Distinguished from `IllegalArgumentException` (bad spec) and
+ * `IllegalStateException` (using a closed dispatcher), which indicate
+ * programming errors. Catching this type lets callers handle runtime
+ * failures specifically without catching every `RuntimeException`.
+ */
+@Experimental
+class DirectWorkerException(message: String, cause: Throwable = null)
+  extends RuntimeException(message, cause)
+
+/**
+ * :: Experimental ::
+ * A [[DirectWorkerException]] caused specifically by a timeout: a worker
+ * that did not bind its socket within `initialization_timeout_ms`, or a
+ * setup callable (verify / install / cleanup) that exceeded
+ * `callableTimeoutMs`. Exposed as a distinct type so callers can choose
+ * different retry / escalation paths for timeouts vs other failures.
+ */
+@Experimental
+class DirectWorkerTimeoutException(message: String, cause: Throwable = null)
+  extends DirectWorkerException(message, cause)
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerProcess.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerProcess.scala
new file mode 100644
index 0000000000000..f4b5c1df63193
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerProcess.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core.direct
+
+import java.nio.file.{Files, Path}
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
+
+import scala.util.control.NonFatal
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.udf.worker.core.{WorkerConnection, WorkerLogger}
+
+/**
+ * :: Experimental ::
+ * A locally-spawned OS process running a UDF worker, together with its
+ * transport connection. Wraps a [[WorkerArtifacts]] bundle (process +
+ * connection + output log) plus a session ref-count scaffolding for
+ * future pooling -- today one process per session.
+ *
+ * Closing sends SIGTERM, waits up to [[gracefulTimeoutMs]], then
+ * delegates connection close + forced kill + file cleanup to
+ * [[WorkerArtifacts.close]].
+ *
+ * @param id stable worker identifier (UUID passed to the binary as `--id`).
+ * @param artifacts process + connection + output-log, disposed together.
+ * @param gracefulTimeoutMs wait after SIGTERM before escalating to SIGKILL.
+ * @param logger [[WorkerLogger]] for process-level messages.
+ * @param onLastSessionReleased fires when the ref-count hits 0. Runs on
+ *     the thread calling [[releaseSession]]. May fire more than once
+ *     across a worker's lifetime; a concurrent `acquireSession` can
+ *     re-increment the count before the callback returns, so pooling
+ *     dispatchers must arbitrate reuse themselves.
+ */
+@Experimental
+class DirectWorkerProcess(
+    val id: String,
+    private[direct] val artifacts: WorkerArtifacts,
+    val gracefulTimeoutMs: Long,
+    protected val logger: WorkerLogger = WorkerLogger.NoOp,
+    private[direct] val onLastSessionReleased: DirectWorkerProcess => Unit = _ => ())
+  extends AutoCloseable {
+
+  // TODO: idle-timeout tracking and concurrent session capacity.
+
+  private val activeSessionCount = new AtomicInteger(0)
+  private val closed = new AtomicBoolean(false)
+
+  /** The OS process handle for this worker. */
+  def process: Process = artifacts.process
+
+  /** The transport connection for this worker. */
+  def connection: WorkerConnection = artifacts.connection
+
+  /** Path to the merged stdout/stderr log for this worker. */
+  def outputFile: Path = artifacts.outputFile
+
+  /** Number of sessions currently using this worker. */
+  def activeSessions: Int = activeSessionCount.get()
+
+  /** Increments the active session count. */
+  def acquireSession(): Unit = activeSessionCount.incrementAndGet()
+
+  /**
+   * Decrements the active session count. Fires [[onLastSessionReleased]]
+   * on the 0-transition. A negative count indicates an unbalanced
+   * acquire/release; we log and reset to 0 rather than silently mask it.
+   */
+  def releaseSession(): Unit = {
+    val c = activeSessionCount.decrementAndGet()
+    if (c < 0) {
+      logger.warn(
+        s"releaseSession called without a matching acquireSession (count=$c)")
+      activeSessionCount.set(0)
+    } else if (c == 0) {
+      // Swallow callback errors so session.close cannot throw.
+      try onLastSessionReleased(this) catch {
+        case NonFatal(e) =>
+          logger.warn(s"onLastSessionReleased callback failed for worker $id", e)
+      }
+    }
+  }
+
+  /** Returns true if the OS process is running and the connection is usable. */
+  def isAlive: Boolean = process.isAlive && connection.isActive
+
+  /**
+   * Sends SIGTERM, waits up to [[gracefulTimeoutMs]] for the worker to
+   * exit, then disposes artifacts (connection close + SIGKILL + file
+   * cleanup). Idempotent via CAS.
+   */
+  override def close(): Unit = {
+    if (!closed.compareAndSet(false, true)) return
+
+    if (process.isAlive) {
+      process.destroy() // SIGTERM
+      try {
+        // Ignore the return value: artifacts.close() SIGKILLs if still
+        // alive and no-ops if already dead.
+        process.waitFor(gracefulTimeoutMs, TimeUnit.MILLISECONDS)
+      } catch {
+        case _: InterruptedException =>
+          Thread.currentThread().interrupt()
+      }
+    }
+
+    artifacts.close()
+  }
+}
+
+/**
+ * Closeable bundle of per-worker OS resources: the child [[Process]], its
+ * transport [[WorkerConnection]], and its merged stdout/stderr log.
+ * [[close]] runs connection close (which for UDS removes the socket
+ * file), then SIGKILL-reaps the process, then deletes the output log.
+ * Graceful SIGTERM is the higher layer's responsibility (see
+ * [[DirectWorkerProcess#close]]).
+ */
+private[direct] final class WorkerArtifacts(
+    val process: Process,
+    val connection: WorkerConnection,
+    val outputFile: Path,
+    private[this] val logger: WorkerLogger) extends AutoCloseable {
+
+  private[this] val closed = new AtomicBoolean(false)
+
+  /**
+   * Idempotently closes the connection (transport teardown + any
+   * transport-specific cleanup such as deleting a UDS socket file),
+   * SIGKILL-reaps the process, and deletes the output log. Each step
+   * is guarded so a failure in one does not skip the next.
+   */
+  override def close(): Unit = {
+    if (!closed.compareAndSet(false, true)) return
+
+    try connection.close() catch {
+      case NonFatal(e) =>
+        logger.warn("Error closing worker connection", e)
+    }
+
+    DirectWorkerDispatcher.destroyForciblyAndReap(process, logger, "worker artifacts")
+
+    try Files.deleteIfExists(outputFile) catch {
+      case NonFatal(e) =>
+        logger.warn(s"Error cleaning up worker output file $outputFile", e)
+    }
+  }
+}
diff --git a/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerSession.scala b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerSession.scala
new file mode 100644
index 0000000000000..7cdc5329350e3
--- /dev/null
+++ b/udf/worker/core/src/main/scala/org/apache/spark/udf/worker/core/direct/DirectWorkerSession.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core.direct
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.udf.worker.core.{WorkerConnection, WorkerSession}
+
+/**
+ * :: Experimental ::
+ * A [[WorkerSession]] backed by a locally-spawned [[DirectWorkerProcess]].
+ *
+ * This is the session type returned by [[DirectWorkerDispatcher]]. It ties
+ * the session lifecycle to the worker's ref-count: the dispatcher increments
+ * the count before construction, and [[close]] decrements it, so the
+ * dispatcher knows when a worker process is idle and can be terminated or
+ * reused.
+ *
+ * Subclasses implement the protocol-specific data transmission
+ * ([[init]], [[process]], [[cancel]]).
+ *
+ * @param workerProcess the direct worker process backing this session.
+ *                      Internal to the `core` package and test code -- the
+ *                      worker handle is a dispatcher implementation detail,
+ *                      not part of the public WorkerSession API.
+ */
+@Experimental
+abstract class DirectWorkerSession(
+    private[core] val workerProcess: DirectWorkerProcess) extends WorkerSession {
+
+  private val released = new AtomicBoolean(false)
+
+  /** The connection to the worker for this session. */
+  def connection: WorkerConnection = workerProcess.connection
+
+  override def close(): Unit = {
+    if (released.compareAndSet(false, true)) {
+      workerProcess.releaseSession()
+    }
+  }
+}
diff --git a/udf/worker/core/src/test/scala/org/apache/spark/udf/worker/core/DirectWorkerDispatcherSuite.scala b/udf/worker/core/src/test/scala/org/apache/spark/udf/worker/core/DirectWorkerDispatcherSuite.scala
new file mode 100644
index 0000000000000..60f5e2211b702
--- /dev/null
+++ b/udf/worker/core/src/test/scala/org/apache/spark/udf/worker/core/DirectWorkerDispatcherSuite.scala
@@ -0,0 +1,981 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.udf.worker.core
+
+import java.io.File
+import java.nio.file.{Files, Path}
+import java.nio.file.attribute.PosixFileAttributeView
+
+import scala.jdk.CollectionConverters._
+
+// scalastyle:off funsuite
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.funsuite.AnyFunSuite
+
+import org.apache.spark.udf.worker.{
+  DirectWorker, LocalTcpConnection, ProcessCallable, UDFWorkerProperties,
+  UDFWorkerSpecification, UnixDomainSocket, WorkerConnectionSpec,
+  WorkerEnvironment}
+import org.apache.spark.udf.worker.core.direct.{DirectUnixSocketWorkerDispatcher,
+  DirectWorkerException, DirectWorkerProcess, DirectWorkerSession,
+  DirectWorkerTimeoutException}
+
+/**
+ * A [[WorkerConnection]] test implementation that considers the connection
+ * active as long as the socket file exists on disk. Inherits socket-file
+ * deletion from [[UnixSocketWorkerConnection.close]].
+ */
+class SocketFileConnection(socketPath: String)
+    extends UnixSocketWorkerConnection(socketPath) {
+  override def isActive: Boolean = new File(socketPath).exists()
+}
+
+/**
+ * A stub [[DirectWorkerSession]] for process-lifecycle tests that don't
+ * need actual data transmission.
+ *
+ * TODO: [[cancel]] is a no-op here. Once a concrete [[DirectWorkerSession]]
+ *   with real data-plane wiring lands, add tests exercising cancel() in
+ *   particular: cancel from a different thread than process(), cancel
+ *   after process() has returned, and cancel before init (should be a
+ *   no-op). Tracking the thread-safety contract in the docstring on
+ *   [[org.apache.spark.udf.worker.core.WorkerSession.cancel]].
+ */
+class StubWorkerSession(
+    workerProcess: DirectWorkerProcess) extends DirectWorkerSession(workerProcess) {
+
+  override protected def doInit(message: InitMessage): Unit = {}
+
+  override protected def doProcess(
+      input: Iterator[Array[Byte]]): Iterator[Array[Byte]] =
+    Iterator.empty
+
+  override def cancel(): Unit = {}
+}
+
+/**
+ * A [[DirectUnixSocketWorkerDispatcher]] subclass for testing that uses
+ * a socket-file connection and stub sessions instead of a real protocol
+ * implementation.
+ */
+class TestDirectWorkerDispatcher(spec: UDFWorkerSpecification)
+    extends DirectUnixSocketWorkerDispatcher(spec) {
+
+  override protected def createConnection(
+      socketPath: String): UnixSocketWorkerConnection =
+    new SocketFileConnection(socketPath)
+
+  override protected def createSessionForWorker(
+      worker: DirectWorkerProcess): WorkerSession =
+    new StubWorkerSession(worker)
+}
+
+/**
+ * Tests for [[DirectWorkerDispatcher]] process lifecycle: spawning workers
+ * and terminating them on close.
+ */
+class DirectWorkerDispatcherSuite
+    extends AnyFunSuite with BeforeAndAfterEach {
+// scalastyle:on funsuite
+
+  private val echoWorkerScript =
+    """
+      |#!/bin/bash
+      |SOCKET_PATH=""
+      |while [[ $# -gt 0 ]]; do
+      |  case "$1" in
+      |    --connection) SOCKET_PATH="$2"; shift 2 ;;
+      |    *) shift ;;
+      |  esac
+      |done
+      |cleanup() { rm -f "$SOCKET_PATH"; exit 0; }
+      |trap cleanup SIGTERM
+      |touch "$SOCKET_PATH"
+      |while true; do sleep 1; done
+    """.stripMargin.trim
+
+  private def defaultRunner: ProcessCallable = ProcessCallable.newBuilder()
+    .addCommand("bash").addCommand("-c").addCommand(echoWorkerScript).addCommand("--")
+    .build()
+
+  private def udsProperties: UDFWorkerProperties = UDFWorkerProperties.newBuilder()
+    .setConnection(WorkerConnectionSpec.newBuilder()
+      .setUnixDomainSocket(UnixDomainSocket.getDefaultInstance)
+      .build())
+    .build()
+
+  private def directWorker(runner: ProcessCallable): DirectWorker =
+    DirectWorker.newBuilder().setRunner(runner).setProperties(udsProperties).build()
+
+  private def specWithRunner(runner: ProcessCallable): UDFWorkerSpecification =
+    UDFWorkerSpecification.newBuilder()
+      .setDirect(directWorker(runner))
+      .build()
+
+  private def specWithEnv(
+      runner: ProcessCallable = defaultRunner,
+      env: WorkerEnvironment): UDFWorkerSpecification =
+    UDFWorkerSpecification.newBuilder()
+      .setEnvironment(env)
+      .setDirect(directWorker(runner))
+      .build()
+
+  private var dispatcher: TestDirectWorkerDispatcher = _
+
+  override def afterEach(): Unit = {
+    if (dispatcher != null) {
+      dispatcher.close()
+      dispatcher = null
+    }
+    super.afterEach()
+  }
+
+  // Narrow the publicly-typed WorkerSession returned by `createSession` back
+  // down to StubWorkerSession in one place, with a descriptive failure if
+  // the cast is ever wrong, so individual tests don't scatter `asInstanceOf`
+  // (which would throw ClassCastException rather than a useful message).
+  private def createStubSession(): StubWorkerSession =
+    dispatcher.createSession(None) match {
+      case stub: StubWorkerSession => stub
+      case other => fail(
+        s"Expected StubWorkerSession, got ${other.getClass.getSimpleName}")
+    }
+
+  // The whole suite uses UDS as the only transport, so reaching past the
+  // generic WorkerConnection abstraction to read the socket path is fine.
+  private def udsPath(w: DirectWorkerProcess): String = w.connection match {
+    case uds: UnixSocketWorkerConnection => uds.socketPath
+    case other => fail(
+      s"Expected UnixSocketWorkerConnection, got ${other.getClass.getSimpleName}")
+  }
+
+  test("creates a worker and session") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+
+    val session = createStubSession()
+    val worker = session.workerProcess
+
+    assert(worker.isAlive, "worker should be alive after creation")
+    assert(worker.activeSessions == 1, "should have 1 active session")
+    assert(new File(udsPath(worker)).exists(), "socket file should exist")
+
+    session.close()
+    assert(worker.activeSessions == 0, "should have 0 sessions after close")
+  }
+
+  test("concurrent createSession calls produce distinct workers") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+
+    val threads = 8
+    val sessions = new java.util.concurrent.ConcurrentLinkedQueue[StubWorkerSession]()
+    val startGate = new java.util.concurrent.CountDownLatch(1)
+    val doneGate = new java.util.concurrent.CountDownLatch(threads)
+    val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]()
+
+    (1 to threads).foreach { _ =>
+      new Thread(() => {
+        try {
+          startGate.await()
+          sessions.add(createStubSession())
+        } catch {
+          case t: Throwable => errors.add(t)
+        } finally {
+          doneGate.countDown()
+        }
+      }).start()
+    }
+    startGate.countDown()
+    assert(doneGate.await(30, java.util.concurrent.TimeUnit.SECONDS),
+      "createSession threads did not finish in time")
+
+    assert(errors.isEmpty,
+      s"unexpected errors during concurrent createSession: ${errors.toArray.mkString(", ")}")
+    assert(sessions.size == threads, "expected one session per thread")
+
+    val sessionList = sessions.asScala.toList
+    val workerObjects = sessionList.map(_.workerProcess)
+    assert(workerObjects.distinct.length == threads,
+      "each session should have its own DirectWorkerProcess")
+    // Object-identity is not sufficient on its own: a future regression
+    // that accidentally shared underlying transport resources could still
+    // hand out distinct DirectWorkerProcess wrappers pointing at the same
+    // socket. Verify socket paths are unique too.
+    val socketPaths = workerObjects.map(udsPath)
+    assert(socketPaths.distinct.length == threads,
+      s"each worker should have its own socket path, got $socketPaths")
+
+    sessionList.foreach(_.close())
+  }
+
+  test("close shuts down all workers via SIGTERM") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+
+    val session1 = createStubSession()
+    val session2 = createStubSession()
+
+    val worker1 = session1.workerProcess
+    val worker2 = session2.workerProcess
+
+    session1.close()
+    session2.close()
+    dispatcher.close()
+    dispatcher = null
+
+    assert(!worker1.process.isAlive, "worker1 should be terminated")
+    assert(!worker2.process.isAlive, "worker2 should be terminated")
+  }
+
+  test("close escalates to SIGKILL when worker ignores SIGTERM") {
+    // The worker traps SIGTERM so the graceful stop is ineffective; the
+    // dispatcher must escalate to SIGKILL via destroyForciblyAndReap.
+    // Using a short gracefulTimeoutMs (500ms) keeps the test bounded:
+    // max close time is gracefulTimeoutMs + SIGKILL_REAP_TIMEOUT_MS.
+    val sigtermIgnoringScript =
+      """
+        |#!/bin/bash
+        |SOCKET_PATH=""
+        |while [[ $# -gt 0 ]]; do
+        |  case "$1" in
+        |    --connection) SOCKET_PATH="$2"; shift 2 ;;
+        |    *) shift ;;
+        |  esac
+        |done
+        |touch "$SOCKET_PATH"
+        |trap '' SIGTERM
+        |while true; do sleep 1; done
+      """.stripMargin.trim
+    val runner = ProcessCallable.newBuilder()
+      .addCommand("bash").addCommand("-c").addCommand(sigtermIgnoringScript).addCommand("--")
+      .build()
+    val shortGracefulProps = UDFWorkerProperties.newBuilder()
+      .setConnection(WorkerConnectionSpec.newBuilder()
+        .setUnixDomainSocket(UnixDomainSocket.getDefaultInstance).build())
+      .setGracefulTerminationTimeoutMs(500)
+      .build()
+    val spec = UDFWorkerSpecification.newBuilder()
+      .setDirect(DirectWorker.newBuilder()
+        .setRunner(runner).setProperties(shortGracefulProps).build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(spec)
+
+    val session = createStubSession()
+    val worker = session.workerProcess
+    assert(worker.process.isAlive, "worker should be alive before close")
+
+    val closeStart = System.nanoTime()
+    session.close()
+    val closeElapsedMs = (System.nanoTime() - closeStart) / 1000000L
+
+    assert(!worker.process.isAlive,
+      s"worker should have been SIGKILLed after ignoring SIGTERM (took ${closeElapsedMs}ms)")
+    assert(closeElapsedMs >= 500L,
+      s"close should have waited for gracefulTimeoutMs before escalating, " +
+        s"took ${closeElapsedMs}ms")
+  }
+
+  test("closing a session terminates its worker") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+
+    val session = createStubSession()
+    val worker = session.workerProcess
+    val socketFile = new File(udsPath(worker))
+
+    assert(worker.process.isAlive, "worker should be alive before session close")
+    assert(socketFile.exists(), "socket file should exist before session close")
+
+    session.close()
+
+    // The session-close path is synchronous: SIGTERM is sent and the process
+    // is reaped before `close` returns.
+    assert(!worker.process.isAlive,
+      "worker process should be terminated when the session closes")
+    assert(!socketFile.exists(),
+      "socket file should be cleaned up when the session closes")
+  }
+
+  test("concurrent session.close and dispatcher.close do not double-close the worker") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+
+    val sessions = (1 to 4).map(_ => createStubSession())
+    val workers = sessions.map(_.workerProcess)
+
+    val barrier = new java.util.concurrent.CyclicBarrier(sessions.size + 1)
+    val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]()
+
+    val sessionThreads = sessions.map { s =>
+      val t = new Thread(() => {
+        try {
+          barrier.await()
+          s.close()
+        } catch {
+          case t: Throwable => errors.add(t)
+        }
+      })
+      t.start()
+      t
+    }
+
+    val dispatcherThread = new Thread(() => {
+      try {
+        barrier.await()
+        dispatcher.close()
+      } catch {
+        case t: Throwable => errors.add(t)
+      }
+    })
+    dispatcherThread.start()
+
+    sessionThreads.foreach(_.join(30000))
+    dispatcherThread.join(30000)
+    dispatcher = null
+
+    assert(errors.isEmpty,
+      s"unexpected errors during concurrent close: ${errors.toArray.mkString(", ")}")
+    workers.foreach { w =>
+      assert(!w.process.isAlive,
+        s"worker at ${udsPath(w)} should be terminated after concurrent close")
+    }
+  }
+
+  test("close racing with in-flight createSession does not leak the worker") {
+    // The acquire-before-publish + post-publish closed re-check pattern in
+    // createSession is designed for this race: thread A is mid-spawn when
+    // thread B calls close(). Thread A must either throw IllegalStateException
+    // (post-publish check caught the close) or receive a session whose worker
+    // is reaped by close()'s iteration. No orphan process or socket file
+    // should remain in either case.
+    val readyLatch = new java.util.concurrent.CountDownLatch(1)
+    val releaseLatch = new java.util.concurrent.CountDownLatch(1)
+    val capturedWorkers =
+      new java.util.concurrent.ConcurrentLinkedQueue[DirectWorkerProcess]()
+    val racing = new DirectUnixSocketWorkerDispatcher(specWithRunner(defaultRunner)) {
+      override protected def createConnection(
+          socketPath: String): UnixSocketWorkerConnection =
+        new SocketFileConnection(socketPath)
+      override protected def createSessionForWorker(
+          worker: DirectWorkerProcess): WorkerSession = {
+        capturedWorkers.add(worker)
+        readyLatch.countDown()
+        // Block here so dispatcher.close() runs while createSession is in
+        // flight. Use a generous wait so a slow CI doesn't time out.
+        if (!releaseLatch.await(30, java.util.concurrent.TimeUnit.SECONDS)) {
+          fail("releaseLatch never fired -- test orchestration broken")
+        }
+        new StubWorkerSession(worker)
+      }
+    }
+    try {
+      val outcome =
+        new java.util.concurrent.atomic.AtomicReference[Either[Throwable, WorkerSession]]()
+      val createThread = new Thread(() => {
+        try {
+          val s = racing.createSession(None)
+          outcome.set(Right(s))
+        } catch {
+          case t: Throwable => outcome.set(Left(t))
+        }
+      }, "createSession-racer")
+      createThread.start()
+
+      // Wait for thread A to have published the worker and entered the
+      // blocking override.
+      assert(readyLatch.await(10, java.util.concurrent.TimeUnit.SECONDS),
+        "createSession thread never reached createSessionForWorker")
+
+      val closeThread = new Thread(() => racing.close(), "close-racer")
+      closeThread.start()
+      // Give close() time to flip `closed` and iterate workers.
+      Thread.sleep(200)
+
+      // Now release the in-flight createSession.
+      releaseLatch.countDown()
+
+      createThread.join(10000)
+      closeThread.join(10000)
+      assert(!createThread.isAlive, "createSession thread did not finish")
+      assert(!closeThread.isAlive, "close thread did not finish")
+
+      val captured = capturedWorkers.toArray(Array.empty[DirectWorkerProcess])
+      assert(captured.length == 1,
+        s"expected exactly one worker spawned, got ${captured.length}")
+      val worker = captured(0)
+
+      outcome.get() match {
+        case Left(e: IllegalStateException) =>
+          // Contractually allowed, but unreachable with this orchestration:
+          // readyLatch only fires after createSession has cleared both
+          // `closed` checks, so B's close cannot flip `closed` in time for
+          // A to observe it. Kept defensive so a future internal change
+          // that introduces a new window is still covered.
+          assert(e.getMessage.contains("closed"),
+            s"expected dispatcher-closed error, got: ${e.getMessage}")
+        case Left(other) =>
+          fail(s"unexpected exception from racing createSession: $other")
+        case Right(_) =>
+          // close() iterated the published worker and tore it down; the
+          // returned session points at a worker that should now be dead.
+      }
+
+      // Whichever path won, the worker must not still be running and the
+      // socket file must be gone.
+      val deadline = System.currentTimeMillis() + 5000
+      while (worker.process.isAlive && System.currentTimeMillis() < deadline) {
+        Thread.sleep(50)
+      }
+      val sockPath = udsPath(worker)
+      assert(!worker.process.isAlive,
+        s"worker process should be terminated after close, still alive at $sockPath")
+      assert(!new java.io.File(sockPath).exists(),
+        s"socket file $sockPath should have been removed")
+    } finally {
+      releaseLatch.countDown()
+      racing.close()
+    }
+  }
+
+  test("worker-provided graceful timeout is capped at the engine-side maximum") {
+    // The proto documents an engine-configurable maximum (fixed at 30s today).
+    // A 60s spec value should be clamped down.
+    val oversizedProps = UDFWorkerProperties.newBuilder()
+      .setConnection(WorkerConnectionSpec.newBuilder()
+        .setUnixDomainSocket(UnixDomainSocket.getDefaultInstance).build())
+      .setGracefulTerminationTimeoutMs(60000)
+      .build()
+    val spec = UDFWorkerSpecification.newBuilder()
+      .setDirect(DirectWorker.newBuilder()
+        .setRunner(defaultRunner).setProperties(oversizedProps).build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(spec)
+
+    val session = createStubSession()
+    assert(session.workerProcess.gracefulTimeoutMs == 30000L,
+      s"graceful timeout should be capped at 30000ms, " +
+        s"got ${session.workerProcess.gracefulTimeoutMs}")
+    session.close()
+  }
+
+  test("worker-provided init timeout is capped at the engine-side maximum") {
+    val oversizedProps = UDFWorkerProperties.newBuilder()
+      .setConnection(WorkerConnectionSpec.newBuilder()
+        .setUnixDomainSocket(UnixDomainSocket.getDefaultInstance).build())
+      .setInitializationTimeoutMs(60000)
+      .build()
+    val spec = UDFWorkerSpecification.newBuilder()
+      .setDirect(DirectWorker.newBuilder()
+        .setRunner(defaultRunner).setProperties(oversizedProps).build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(spec)
+
+    assert(dispatcher.initTimeoutMs == 30000L,
+      s"init timeout should be capped at 30000ms, got ${dispatcher.initTimeoutMs}")
+  }
+
+  test("createSession after close is rejected") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+    dispatcher.close()
+
+    val ex = intercept[IllegalStateException] {
+      dispatcher.createSession(None)
+    }
+    assert(ex.getMessage.contains("closed"),
+      s"expected dispatcher-closed error, got: ${ex.getMessage}")
+    dispatcher = null
+  }
+
+  test("socket directory is owner-only (0700) on POSIX") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+    // Drive one createSession so a worker (and therefore the socket dir) is
+    // observable via the UDS connection's path.
+    val session = createStubSession()
+    val socketDir: Path = new File(udsPath(session.workerProcess)).toPath.getParent
+    session.close()
+
+    val view = Files.getFileAttributeView(socketDir, classOf[PosixFileAttributeView])
+    // Skip explicitly on non-POSIX filesystems rather than silently pass,
+    // so a CI environment without POSIX attributes is visible in the
+    // test report instead of giving false confidence.
+    assume(view != null, s"POSIX file attributes required to check $socketDir")
+    val perms = view.readAttributes().permissions().asScala.toSet
+    val expected = java.nio.file.attribute.PosixFilePermissions
+      .fromString("rwx------").asScala.toSet
+    assert(perms == expected,
+      s"socket directory $socketDir should be 0700, got ${perms.mkString(",")}")
+  }
+
+  test("socket directory is removed after dispatcher.close") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+    val session = createStubSession()
+    val socketDir = new File(udsPath(session.workerProcess)).toPath.getParent.toFile
+    assert(socketDir.exists(),
+      s"socket directory $socketDir should exist while a session is open")
+    session.close()
+
+    dispatcher.close()
+    dispatcher = null
+
+    assert(!socketDir.exists(),
+      s"socket directory $socketDir should be removed after dispatcher.close")
+  }
+
+  // -- Error-path tests -------------------------------------------------------
+
+  test("worker is cleaned up when createSessionForWorker throws") {
+    // A dispatcher whose createSessionForWorker always throws. The spawned
+    // worker must be terminated rather than leaked until dispatcher.close().
+    var capturedWorker: DirectWorkerProcess = null
+    val failingDispatcher =
+      new DirectUnixSocketWorkerDispatcher(specWithRunner(defaultRunner)) {
+        override protected def createConnection(
+            socketPath: String): UnixSocketWorkerConnection =
+          new SocketFileConnection(socketPath)
+        override protected def createSessionForWorker(
+            worker: DirectWorkerProcess): WorkerSession = {
+          capturedWorker = worker
+          throw new RuntimeException("session creation failed")
+        }
+      }
+
+    try {
+      val ex = intercept[RuntimeException] {
+        failingDispatcher.createSession(None)
+      }
+      assert(ex.getMessage.contains("session creation failed"))
+      assert(capturedWorker != null, "worker should have been spawned before the failure")
+      assert(!capturedWorker.process.isAlive,
+        "worker process should have been terminated after session creation failed")
+      assert(capturedWorker.activeSessions == 0,
+        "worker session count should be released after failure")
+    } finally {
+      failingDispatcher.close()
+    }
+  }
+
+  test("DirectWorker without a connection is rejected") {
+    val badSpec = UDFWorkerSpecification.newBuilder()
+      .setDirect(DirectWorker.newBuilder().setRunner(defaultRunner).build())
+      .build()
+    val ex = intercept[IllegalArgumentException] {
+      new TestDirectWorkerDispatcher(badSpec)
+    }
+    assert(ex.getMessage.contains("connection must be set"),
+      s"expected missing-connection error, got: ${ex.getMessage}")
+  }
+
+  test("DirectWorker with non-UDS transport is rejected") {
+    val tcpProperties = UDFWorkerProperties.newBuilder()
+      .setConnection(WorkerConnectionSpec.newBuilder()
+        .setTcp(LocalTcpConnection.getDefaultInstance).build())
+      .build()
+    val badSpec = UDFWorkerSpecification.newBuilder()
+      .setDirect(DirectWorker.newBuilder()
+        .setRunner(defaultRunner).setProperties(tcpProperties).build())
+      .build()
+    val ex = intercept[IllegalArgumentException] {
+      new TestDirectWorkerDispatcher(badSpec)
+    }
+    assert(ex.getMessage.contains("UNIX domain socket"),
+      s"expected UDS-only error, got: ${ex.getMessage}")
+  }
+
+  test("socket file is cleaned up when createConnection throws") {
+    val capturedSocketPaths = new java.util.concurrent.ConcurrentLinkedQueue[String]()
+    val failingDispatcher =
+      new DirectUnixSocketWorkerDispatcher(specWithRunner(defaultRunner)) {
+        override protected def createConnection(
+            socketPath: String): UnixSocketWorkerConnection = {
+          capturedSocketPaths.add(socketPath)
+          throw new RuntimeException("connection creation failed")
+        }
+        override protected def createSessionForWorker(
+            worker: DirectWorkerProcess): WorkerSession =
+          new StubWorkerSession(worker)
+      }
+    try {
+      val ex = intercept[RuntimeException] {
+        failingDispatcher.createSession(None)
+      }
+      assert(ex.getMessage.contains("connection creation failed"))
+      assert(capturedSocketPaths.size == 1, "createConnection should have been called once")
+      val socketPath = capturedSocketPaths.peek()
+      assert(!new File(socketPath).exists(),
+        s"socket file $socketPath should have been cleaned up")
+    } finally {
+      failingDispatcher.close()
+    }
+  }
+
+  test("empty ProcessCallable command is rejected with a clear error") {
+    val emptyRunner = ProcessCallable.newBuilder().build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(emptyRunner))
+    val ex = intercept[IllegalArgumentException] {
+      dispatcher.createSession(None)
+    }
+    assert(ex.getMessage.contains("at least one entry"),
+      s"expected explicit empty-command error, got: ${ex.getMessage}")
+  }
+
+  test("spawnWorker fails when worker process exits immediately") {
+    val runner = ProcessCallable.newBuilder()
+      .addCommand("bash").addCommand("-c")
+      .addCommand("echo 'fatal: bad config' >&2; exit 42").addCommand("--")
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(runner))
+
+    val ex = intercept[RuntimeException] {
+      dispatcher.createSession(None)
+    }
+    assert(ex.getMessage.contains("exited with code 42"),
+      s"expected early-exit error, got: ${ex.getMessage}")
+    assert(ex.getMessage.contains("fatal: bad config"),
+      s"expected process output in error, got: ${ex.getMessage}")
+  }
+
+  test("spawnWorker times out when worker stays alive but never creates socket") {
+    // Distinct from the "process exits immediately" case: here the worker
+    // process is healthy but simply doesn't bind the socket, so the
+    // dispatcher must time out and SIGKILL-reap it rather than wait forever.
+    val hangingRunner = ProcessCallable.newBuilder()
+      .addCommand("bash").addCommand("-c")
+      .addCommand("while true; do sleep 1; done").addCommand("--")
+      .build()
+    val shortInitProps = UDFWorkerProperties.newBuilder()
+      .setConnection(WorkerConnectionSpec.newBuilder()
+        .setUnixDomainSocket(UnixDomainSocket.getDefaultInstance).build())
+      .setInitializationTimeoutMs(500)
+      .build()
+    val spec = UDFWorkerSpecification.newBuilder()
+      .setDirect(DirectWorker.newBuilder()
+        .setRunner(hangingRunner).setProperties(shortInitProps).build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(spec)
+
+    val ex = intercept[DirectWorkerTimeoutException] {
+      dispatcher.createSession(None)
+    }
+    assert(ex.getMessage.contains("did not create socket"),
+      s"expected init-timeout error, got: ${ex.getMessage}")
+    assert(ex.getMessage.contains("500ms"),
+      s"expected timeout value in error, got: ${ex.getMessage}")
+  }
+
+  // -- Environment lifecycle tests -------------------------------------------
+
+  test("skips installation when verification succeeds") {
+    val markerFile = Files.createTempFile("env-install-marker", ".txt").toFile
+    markerFile.delete()
+
+    val env = WorkerEnvironment.newBuilder()
+      .setEnvironmentVerification(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c").addCommand("exit 0").build())
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(s"touch ${markerFile.getAbsolutePath}").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val session = dispatcher.createSession(None)
+    session.close()
+
+    assert(!markerFile.exists(),
+      "installation should not run when verification succeeds")
+  }
+
+  test("runs installation when verification fails") {
+    val markerFile = Files.createTempFile("env-install-marker", ".txt").toFile
+    markerFile.delete()
+
+    val env = WorkerEnvironment.newBuilder()
+      .setEnvironmentVerification(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c").addCommand("exit 1").build())
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(s"touch ${markerFile.getAbsolutePath}").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val session = dispatcher.createSession(None)
+    session.close()
+
+    assert(markerFile.exists(),
+      "installation should run when verification fails")
+    markerFile.delete()
+  }
+
+  test("runs installation when no verification callable is provided") {
+    val markerFile = Files.createTempFile("env-install-marker", ".txt").toFile
+    markerFile.delete()
+
+    val env = WorkerEnvironment.newBuilder()
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(s"touch ${markerFile.getAbsolutePath}").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val session = dispatcher.createSession(None)
+    session.close()
+
+    assert(markerFile.exists(),
+      "installation should run when no verification is defined")
+    markerFile.delete()
+  }
+
+  test("installation failure throws with process output and prevents worker creation") {
+    val env = WorkerEnvironment.newBuilder()
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand("echo 'missing dependency: libfoo' >&2; exit 7").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val ex = intercept[RuntimeException] {
+      dispatcher.createSession(None)
+    }
+    assert(ex.getMessage.contains("exit code 7"),
+      s"expected installation failure, got: ${ex.getMessage}")
+    assert(ex.getMessage.contains("missing dependency: libfoo"),
+      s"expected process output in error, got: ${ex.getMessage}")
+  }
+
+  test("installation that exceeds callableTimeoutMs is killed and reported") {
+    // Installation sleeps longer than callableTimeoutMs; the dispatcher
+    // must SIGKILL-reap it and surface a "Callable timed out" error
+    // rather than hang the caller.
+    val slowInstall = ProcessCallable.newBuilder()
+      .addCommand("bash").addCommand("-c")
+      .addCommand("sleep 30").build()
+    val env = WorkerEnvironment.newBuilder().setInstallation(slowInstall).build()
+    val shortTimeoutDispatcher =
+      new DirectUnixSocketWorkerDispatcher(specWithEnv(env = env)) {
+        override protected def callableTimeoutMs: Long = 500L
+        override protected def createConnection(
+            socketPath: String): UnixSocketWorkerConnection =
+          new SocketFileConnection(socketPath)
+        override protected def createSessionForWorker(
+            worker: DirectWorkerProcess): WorkerSession =
+          new StubWorkerSession(worker)
+      }
+    try {
+      val ex = intercept[DirectWorkerTimeoutException] {
+        shortTimeoutDispatcher.createSession(None)
+      }
+      assert(ex.getMessage.contains("Callable timed out"),
+        s"expected callable-timeout error, got: ${ex.getMessage}")
+      assert(ex.getMessage.contains("500ms"),
+        s"expected timeout value in error, got: ${ex.getMessage}")
+    } finally {
+      shortTimeoutDispatcher.close()
+    }
+  }
+
+  test("environment setup runs only once across multiple sessions") {
+    val counterFile = Files.createTempFile("env-counter", ".txt").toFile
+    counterFile.delete()
+
+    val env = WorkerEnvironment.newBuilder()
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(s"echo invoked >> ${counterFile.getAbsolutePath}").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val s1 = dispatcher.createSession(None); s1.close()
+    val s2 = dispatcher.createSession(None); s2.close()
+
+    val src = scala.io.Source.fromFile(counterFile)
+    val lines = try src.getLines().toList finally src.close()
+    assert(lines.size == 1,
+      s"installation should run exactly once, but ran ${lines.size} time(s)")
+    counterFile.delete()
+  }
+
+  test("concurrent createSession still installs exactly once") {
+    // The sequential single-install test above cannot catch a missing
+    // lock around ensureEnvironmentReady. Race many createSession calls
+    // with an install script that takes long enough for the threads to
+    // queue on environmentLock, then verify it still ran exactly once.
+    val counterFile = Files.createTempFile("env-concurrent-install", ".txt").toFile
+    counterFile.delete()
+
+    val env = WorkerEnvironment.newBuilder()
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(
+          s"sleep 0.2; echo invoked >> ${counterFile.getAbsolutePath}").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val threads = 4
+    val startGate = new java.util.concurrent.CountDownLatch(1)
+    val doneGate = new java.util.concurrent.CountDownLatch(threads)
+    val sessions = new java.util.concurrent.ConcurrentLinkedQueue[WorkerSession]()
+    val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]()
+
+    (1 to threads).foreach { _ =>
+      new Thread(() => {
+        try {
+          startGate.await()
+          sessions.add(dispatcher.createSession(None))
+        } catch {
+          case t: Throwable => errors.add(t)
+        } finally {
+          doneGate.countDown()
+        }
+      }).start()
+    }
+    startGate.countDown()
+    assert(doneGate.await(30, java.util.concurrent.TimeUnit.SECONDS),
+      "createSession threads did not finish in time")
+    assert(errors.isEmpty,
+      s"unexpected errors during concurrent createSession: ${errors.toArray.mkString(", ")}")
+
+    val src = scala.io.Source.fromFile(counterFile)
+    val lines = try src.getLines().toList finally src.close()
+    assert(lines.size == 1,
+      s"installation should run exactly once under concurrent createSession, " +
+        s"but ran ${lines.size} time(s)")
+
+    sessions.asScala.foreach(_.close())
+    counterFile.delete()
+  }
+
+  test("failed environment setup is not retried on subsequent createSession") {
+    val counterFile = Files.createTempFile("env-failed-counter", ".txt").toFile
+    counterFile.delete()
+
+    // Installation script appends a line every time it runs, then always
+    // fails. The first createSession should run it; the second should be
+    // rejected immediately without re-running.
+    val env = WorkerEnvironment.newBuilder()
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(
+          s"echo invoked >> ${counterFile.getAbsolutePath}; exit 1").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val first = intercept[RuntimeException] { dispatcher.createSession(None) }
+    assert(first.getMessage.contains("installation failed"),
+      s"expected first-attempt installation failure, got: ${first.getMessage}")
+
+    val second = intercept[RuntimeException] { dispatcher.createSession(None) }
+    assert(second.getMessage.contains("previously failed"),
+      s"expected cached failure on retry, got: ${second.getMessage}")
+
+    val src = scala.io.Source.fromFile(counterFile)
+    val lines = try src.getLines().toList finally src.close()
+    assert(lines.size == 1,
+      s"installation should run only once across failed retries, got ${lines.size}")
+    counterFile.delete()
+  }
+
+  test("installation timeout transitions to Failed and is not retried") {
+    val counterFile = Files.createTempFile("env-timeout-counter", ".txt").toFile
+    counterFile.delete()
+
+    // Install appends to a counter file, then sleeps past callableTimeoutMs
+    // so runCallable times out. The dispatcher must mark the env Failed
+    // and reject the next createSession without re-running install.
+    val env = WorkerEnvironment.newBuilder()
+      .setInstallation(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(
+          s"echo invoked >> ${counterFile.getAbsolutePath}; sleep 30").build())
+      .build()
+    val timeoutDispatcher =
+      new DirectUnixSocketWorkerDispatcher(specWithEnv(env = env)) {
+        override protected def callableTimeoutMs: Long = 500L
+        override protected def createConnection(
+            socketPath: String): UnixSocketWorkerConnection =
+          new SocketFileConnection(socketPath)
+        override protected def createSessionForWorker(
+            worker: DirectWorkerProcess): WorkerSession =
+          new StubWorkerSession(worker)
+      }
+    try {
+      val first = intercept[DirectWorkerTimeoutException] {
+        timeoutDispatcher.createSession(None)
+      }
+      assert(first.getMessage.contains("Callable timed out"),
+        s"expected callable-timeout error, got: ${first.getMessage}")
+
+      val second = intercept[DirectWorkerException] {
+        timeoutDispatcher.createSession(None)
+      }
+      assert(second.getMessage.contains("previously failed"),
+        s"expected cached failure on retry, got: ${second.getMessage}")
+
+      val src = scala.io.Source.fromFile(counterFile)
+      val lines = try src.getLines().toList finally src.close()
+      assert(lines.size == 1,
+        s"installation should run only once across timed-out retries, got ${lines.size}")
+    } finally {
+      timeoutDispatcher.close()
+      counterFile.delete()
+    }
+  }
+
+  test("non-None securityScope is rejected until pooling lands") {
+    dispatcher = new TestDirectWorkerDispatcher(specWithRunner(defaultRunner))
+    val scope = new WorkerSecurityScope {
+      override def equals(obj: Any): Boolean = obj.isInstanceOf[this.type]
+      override def hashCode(): Int = 0
+    }
+    val ex = intercept[IllegalArgumentException] {
+      dispatcher.createSession(Some(scope))
+    }
+    assert(ex.getMessage.contains("not supported yet"),
+      s"expected unsupported-scope error, got: ${ex.getMessage}")
+  }
+
+  test("verification without installation is rejected") {
+    val env = WorkerEnvironment.newBuilder()
+      .setEnvironmentVerification(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c").addCommand("exit 0").build())
+      .build()
+    val ex = intercept[IllegalArgumentException] {
+      new TestDirectWorkerDispatcher(specWithEnv(env = env))
+    }
+    assert(ex.getMessage.contains("installation"),
+      s"expected installation-required error, got: ${ex.getMessage}")
+  }
+
+  test("cleanup runs on dispatcher close") {
+    val cleanupMarker = Files.createTempFile("env-cleanup-marker", ".txt").toFile
+    cleanupMarker.delete()
+
+    val env = WorkerEnvironment.newBuilder()
+      .setEnvironmentCleanup(ProcessCallable.newBuilder()
+        .addCommand("bash").addCommand("-c")
+        .addCommand(s"touch ${cleanupMarker.getAbsolutePath}").build())
+      .build()
+    dispatcher = new TestDirectWorkerDispatcher(specWithEnv(env = env))
+
+    val session = dispatcher.createSession(None)
+    session.close()
+
+    assert(!cleanupMarker.exists(),
+      "cleanup should not run until dispatcher is closed")
+
+    dispatcher.close()
+    dispatcher = null
+
+    assert(cleanupMarker.exists(),
+      "cleanup should run when dispatcher is closed")
+    cleanupMarker.delete()
+  }
+}
diff --git a/udf/worker/proto/pom.xml b/udf/worker/proto/pom.xml
index 8a2c604ef1d2f..894dcce9fb55d 100644
--- a/udf/worker/proto/pom.xml
+++ b/udf/worker/proto/pom.xml
@@ -24,7 +24,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/udf/worker/proto/src/main/protobuf/common.proto b/udf/worker/proto/src/main/protobuf/common.proto
index 9c50cdd7a7e4b..ee032def73efe 100644
--- a/udf/worker/proto/src/main/protobuf/common.proto
+++ b/udf/worker/proto/src/main/protobuf/common.proto
@@ -32,6 +32,13 @@ enum UDFWorkerDataFormat {
 }
 
 // The UDF execution type/shape.
+//
+// BIDIRECTIONAL_STREAMING is the only pattern supported by the engine for
+// now. It may be possible to express all UDF types (scalar, mapPartitions,
+// and eventually UDAF/UDTF/streaming) on top of this single pattern by
+// framing their phases as messages on the stream, but that is a design
+// question worth revisiting as additional UDF types are added -- for
+// example, aggregation may prefer a multi-round or specialized pattern.
 enum UDFProtoCommunicationPattern {
     UDF_PROTO_COMMUNICATION_PATTERN_UNSPECIFIED = 0;
 
diff --git a/udf/worker/proto/src/main/protobuf/worker_spec.proto b/udf/worker/proto/src/main/protobuf/worker_spec.proto
index f2eacf2b3ce35..83dac4f962e5f 100644
--- a/udf/worker/proto/src/main/protobuf/worker_spec.proto
+++ b/udf/worker/proto/src/main/protobuf/worker_spec.proto
@@ -140,13 +140,17 @@ message WorkerCapabilities {
     // Whether multiple, concurrent UDF
     // connections are supported by this worker
     // (for example via multi-threading).
-    // 
+    //
     // In the first implementation of the engine-side
     // worker specification, this property will not be used.
-    // 
+    //
     // Usage of this property can be enabled in the future if the
     // engine implements more advanced resource management (TBD).
     //
+    // TODO: wire this into planning/scheduling -- SPIP worker-spec §2.4
+    // "Parallelism" describes the intended use (e.g., multiplex tasks onto
+    // a single worker vs. spawn multiple workers per executor).
+    //
     // (Optional)
     optional bool supports_concurrent_udfs = 3;
 
@@ -190,25 +194,31 @@ message UDFWorkerProperties {
     // (Optional)
     optional int32 graceful_termination_timeout_ms = 2;
 
-    // The connection this [[DirectWorker]] supports. Note that a single
-    // connection is sufficient to run multiple UDFs and (gRPC) services.
+    // A [[DirectWorker]] exposes one server-side connection endpoint (a
+    // UDS path or a TCP port) that all sessions on the worker share.
+    // Multi-connection workers (e.g., separate data and control channels)
+    // are not supported in this release.
+    //
+    // On [[DirectWorker]] creation, connection information
+    // is passed to the callable as a string parameter.
+    // The string format depends on the [[WorkerConnectionSpec]]:
     //
-    // On [[DirectWorker]] creation, connection information 
-    // is passed to the callable as a string parameter. 
-    // The string format depends on the [[WorkerConnection]]:
-    // 
     // For example, when using TCP, the callable argument will be:
     // --connection PORT
     // Here is a concrete example
     // --connection 8080
-    // 
+    //
     // For the format of each specific transport type, see the comments below.
     //
     // (Required)
-    WorkerConnection connection = 3;
+    WorkerConnectionSpec connection = 3;
 }
 
-message WorkerConnection {
+// Describes one connection (transport endpoint) that a [[DirectWorker]]
+// exposes. This is a configuration message -- the live transport object
+// used by the engine at runtime is the Scala abstraction
+// `org.apache.spark.udf.worker.core.WorkerConnection`.
+message WorkerConnectionSpec {
     // (Required)
     oneof transport {
         UnixDomainSocket unix_domain_socket = 1;
@@ -275,7 +285,7 @@ message ProcessCallable {
     // 
     // --connection
     // The value of the connection argument is a string with
-    // engine-assinged connection parameters. See [[UDFWorkerProperties]]
+    // engine-assigned connection parameters. See [[UDFWorkerProperties]]
     // for details.
     //
     // (Optional)