diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34d4846c1e83d..ac415d279f215 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,13 +30,10 @@ jobs: include: - name: "core / utils / tags" slug: "core-utils-tags" - modules: ":spark-core_2.13,:spark-launcher_2.13,:spark-network-common_2.13,:spark-network-shuffle_2.13,:spark-network-yarn_2.13,:spark-unsafe_2.13,:spark-kvstore_2.13,:spark-tags_2.13,:spark-sketch_2.13,:spark-common-utils_2.13" - - name: "graphx / examples / repl" - slug: "graphx-examples-repl" - modules: ":spark-graphx_2.13,:spark-examples_2.13,:spark-repl_2.13" - - name: "catalyst / sql-api / hive-thriftserver" - slug: "catalyst-sql-api-hive-thriftserver" - modules: ":spark-sql-api_2.13,:spark-catalyst_2.13,:spark-hive-thriftserver_2.13" + modules: ":spark-core_2.13,:spark-launcher_2.13,:spark-network-common_2.13,:spark-network-shuffle_2.13,:spark-network-yarn_2.13,:spark-unsafe_2.13,:spark-kvstore_2.13,:spark-tags_2.13,:spark-sketch_2.13,:spark-common-utils_2.13,:spark-common-utils-java_2.13,:spark-udf-worker-core_2.13" + - name: "catalyst / sql-api / hive-thriftserver / pipelines / graphx / examples / repl" + slug: "catalyst-graphx" + modules: ":spark-sql-api_2.13,:spark-catalyst_2.13,:spark-hive-thriftserver_2.13,:spark-pipelines_2.13,:spark-graphx_2.13,:spark-examples_2.13,:spark-repl_2.13" - name: "sql - extended tests" slug: "sql" modules: ":spark-sql_2.13" @@ -52,10 +49,19 @@ jobs: - name: "hive" slug: "hive" modules: ":spark-hive_2.13" - - name: "streaming / mllib / yarn / k8s / connect / protobuf / kafka / avro" - slug: "streaming-mllib-yarn-k8s-connect-protobuf-kafka-avro" - modules: ":spark-streaming_2.13,:spark-sql-kafka-0-10_2.13,:spark-streaming-kafka-0-10_2.13,:spark-token-provider-kafka-0-10_2.13,:spark-mllib-local_2.13,:spark-mllib_2.13,:spark-yarn_2.13,:spark-kubernetes_2.13,:spark-hadoop-cloud_2.13,:spark-connect_2.13,:spark-connect-common_2.13,:spark-connect-client-jvm_2.13,:spark-protobuf_2.13,:spark-avro_2.13,:spark-assembly_2.13" + - name: "mllib" + slug: "mllib" + modules: ":spark-mllib-local_2.13,:spark-mllib_2.13" + - name: "connect / protobuf" + slug: "connect-protobuf" + modules: ":spark-connect_2.13,:spark-connect-common_2.13,:spark-connect-client-jvm_2.13,:spark-connect-client-jdbc_2.13,:spark-protobuf_2.13" extra: -Dtest.exclude.tags=org.apache.spark.tags.AmmoniteTest + - name: "streaming / kafka / avro" + slug: "streaming-kafka-avro" + modules: ":spark-streaming_2.13,:spark-sql-kafka-0-10_2.13,:spark-streaming-kafka-0-10_2.13,:spark-token-provider-kafka-0-10_2.13,:spark-avro_2.13" + - name: "yarn / k8s / hadoop-cloud / assembly" + slug: "yarn-k8s-hadoop-cloud-assembly" + modules: ":spark-yarn_2.13,:spark-kubernetes_2.13,:spark-hadoop-cloud_2.13,:spark-assembly_2.13" steps: - uses: actions/checkout@v6 @@ -77,8 +83,8 @@ jobs: run: | python3 -m pip install --upgrade pip python3 -m pip install 'numpy>=1.20.0' 'pyarrow' 'pandas' 'scipy' \ - 'unittest-xml-reporting' 'grpcio==1.56.0' 'protobuf==4.25.3' \ - 'grpcio-status==1.56.0' 'googleapis-common-protos==1.56.4' \ + 'unittest-xml-reporting' 'grpcio==1.76.0' 'protobuf==6.33.5' \ + 'grpcio-status==1.76.0' 'googleapis-common-protos==1.71.0' \ 'zstandard==0.25.0' - name: Build dependent modules (compile main+tests, install incl. test-jars) @@ -149,23 +155,19 @@ jobs: matrix: include: - name: sql - modules: pyspark-sql,pyspark-resource,pyspark-testing - - name: core - modules: pyspark-core,pyspark-streaming + modules: pyspark-sql,pyspark-resource,pyspark-testing,pyspark-core,pyspark-errors,pyspark-logger - name: ml - modules: pyspark-mllib,pyspark-ml + modules: pyspark-mllib,pyspark-ml,pyspark-ml-connect,pyspark-pipelines + - name: streaming + modules: pyspark-streaming,pyspark-structured-streaming,pyspark-structured-streaming-connect + - name: connect + modules: pyspark-connect - name: pandas modules: pyspark-pandas - name: pandas-slow modules: pyspark-pandas-slow - - name: connect - modules: pyspark-connect - - name: pandas-connect - modules: pyspark-pandas-connect - - name: pandas-slow-connect - modules: pyspark-pandas-slow-connect - - name: errors - modules: pyspark-errors + - name: pandas-connect-and-slow + modules: pyspark-pandas-connect,pyspark-pandas-slow-connect env: MODULES_TO_TEST: ${{ matrix.modules }} PYTHON_TO_TEST: python3.10 @@ -192,11 +194,12 @@ jobs: 'numpy==1.26.4' 'pyarrow==18.0.0' 'pandas==2.2.0' 'scipy' \ 'unittest-xml-reporting' 'coverage' \ 'memory-profiler' 'plotly<6' 'matplotlib' \ - 'grpcio==1.56.0' 'grpcio-status==1.56.0' \ - 'protobuf==4.25.3' 'googleapis-common-protos==1.56.4' \ + 'grpcio==1.76.0' 'grpcio-status==1.76.0' \ + 'protobuf==6.33.5' 'googleapis-common-protos==1.71.0' \ 'graphviz>=0.20' 'openpyxl' \ 'scikit-learn==1.1.*' 'mlflow==3.12.0' \ - 'torch==2.0.1' 'torchvision==0.15.2' 'torcheval' + 'torch==2.5.1' 'torchvision==0.20.1' 'torcheval' \ + 'zstandard==0.25.0' - name: Build Spark (full reactor including assembly) env: diff --git a/AGENTS.md b/AGENTS.md index 28272d19fe933..89c8a77d51a00 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,7 +22,7 @@ Avoid introducing non-ASCII characters in code or comments. String literals may ## Build and Test -Build and tests can take a long time. Before running tests, ask the user if they have more changes to make. +Build and tests can take a long time. If the user explicitly asked to run tests, run them. Otherwise (you are running tests on your own to verify a change), first ask the user if they have more changes to make. Prefer SBT over Maven for faster incremental compilation. Module names are defined in `project/SparkBuild.scala`. @@ -128,3 +128,9 @@ DO NOT push to the upstream repo. Always push to the personal fork. Open PRs aga DO NOT force push or use `--amend` on pushed commits unless the user explicitly asks. If the remote branch has new commits, fetch and rebase before pushing. Always get user approval before external operations such as pushing commits, creating PRs, or posting comments. Use `gh pr create` to open PRs. If `gh` is not installed, generate the GitHub PR URL for the user and recommend installing the GitHub CLI. + +## Security + +Security model: [SECURITY.md](./SECURITY.md) + +Agents that scan this repository should consult `SECURITY.md` for the project's threat model, in-scope / out-of-scope declarations, and known non-findings before reporting issues. diff --git a/R/.gitignore b/R/.gitignore index c98504ab07781..693b2b7c69175 100644 --- a/R/.gitignore +++ b/R/.gitignore @@ -6,3 +6,7 @@ pkg/man pkg/html SparkR.Rcheck/ SparkR_*.tar.gz +# Transient copies made by dev/make-distribution.sh while building the package. +pkg/LICENSE +pkg/NOTICE +DESCRIPTION.orig diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 7bbcee567c608..78283891dea81 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 4.2.0.1-4.3.0-0 +Version: 4.2.0.1-4.3.0-1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/README.md b/README.md index 44a99a8231d31..e4476174203b8 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,6 @@ This README file only contains basic setup instructions. | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.14_nogil.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14_nogil.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml) | -| | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_connect40.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_connect40.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_connect.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_connect.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_sparkr_window.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_sparkr_window.yml) | diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000000..26b540d81ab53 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,13 @@ +# Security policy + +## Reporting a vulnerability + +To report a security vulnerability in Apache Spark, follow the ASF security process at . Reports can be sent privately to security@apache.org or to private@spark.apache.org. + +## Threat model + +Apache Spark's security guidance for operators — covering authentication, encryption, access control, Kerberos integration, trust boundaries, environmental assumptions, security properties provided and not provided, and downstream operator responsibilities — is documented at: + + + +Automated security scanning agents should consult that document for the project's in-scope / out-of-scope declarations before reporting issues. diff --git a/assembly/pom.xml b/assembly/pom.xml index 440677b45e80f..aba28e2cf858d 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index f1c5ea216d47e..6a936b31f28a0 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 63cb28d42d583..72dc7bef3b5f3 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java index 2edeb3f05c9b0..64edc0edae6f3 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java @@ -23,6 +23,7 @@ import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeFalse; public class JavaUtilsSuite { @@ -52,6 +53,10 @@ public void testCreateDirectory() throws IOException { // 4. The parent directory cannot write assertTrue(testDir.canWrite()); assertTrue(testDir.setWritable(false)); + // Skip when setWritable(false) has no effect (e.g. running as root, + // or on a filesystem that ignores POSIX write bits). + assumeFalse(testDir.canWrite(), + "setWritable(false) had no effect; skipping write-denied scenario"); assertThrows(IOException.class, () -> JavaUtils.createDirectory(testDirPath, "scenario4")); assertTrue(testDir.setWritable(true)); diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 317499a1b116f..9b5a916587056 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index ca6631888edae..cf85e7577a759 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ff6d2c6b4859e..d5d9a986d664d 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 3a6ce00a2c557..b49d6baa14607 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index f23ca99dbdff6..55bb994fa9b15 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java index 3e2bfbcd87ca3..8df59b1f6e342 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java @@ -110,7 +110,7 @@ public record CollationMeta( public static class Collation { public final String collationName; public final String provider; - private final Collator collator; + private final ThreadLocal threadLocalCollator; public final Comparator comparator; /** @@ -187,7 +187,7 @@ public static class Collation { public Collation( String collationName, String provider, - Collator collator, + ThreadLocal threadLocalCollator, Comparator comparator, String version, Function sortKeyFunction, @@ -197,7 +197,7 @@ public Collation( boolean supportsSpaceTrimming) { this.collationName = collationName; this.provider = provider; - this.collator = collator; + this.threadLocalCollator = threadLocalCollator; this.comparator = comparator; this.version = version; this.sortKeyFunction = sortKeyFunction; @@ -216,7 +216,7 @@ public Collation( } public Collator getCollator() { - return collator; + return threadLocalCollator != null ? threadLocalCollator.get() : null; } /** @@ -1016,29 +1016,40 @@ protected Collation buildCollation() { builder.setUnicodeLocaleKeyword("ks", "level1"); } ULocale resultLocale = builder.build(); - Collator collator = Collator.getInstance(resultLocale); - // Freeze ICU collator to ensure thread safety. - collator.freeze(); + + // Use thread-local Collator instances to avoid lock contention. + // A frozen RuleBasedCollator serializes all threads through a ReentrantLock on its + // internal collation buffer (used by getCollationKey/compare). By creating independent + // per-thread instances via Collator.getInstance(), each thread operates on its own + // buffer without locking. Each instance is frozen as a mutation guard so that any + // accidental call to setStrength() or similar throws immediately. + ThreadLocal threadLocalCollator = ThreadLocal.withInitial( + () -> { + Collator collator = Collator.getInstance(resultLocale); + collator.freeze(); + return collator; + }); Comparator comparator; Function sortKeyFunction; if (spaceTrimming == SpaceTrimming.NONE) { comparator = (s1, s2) -> - collator.compare(s1.toValidString(), s2.toValidString()); - sortKeyFunction = s -> collator.getCollationKey(s.toValidString()).toByteArray(); + threadLocalCollator.get().compare(s1.toValidString(), s2.toValidString()); + sortKeyFunction = s -> + threadLocalCollator.get().getCollationKey(s.toValidString()).toByteArray(); } else { - comparator = (s1, s2) -> collator.compare( + comparator = (s1, s2) -> threadLocalCollator.get().compare( applyTrimmingPolicy(s1, spaceTrimming).toValidString(), applyTrimmingPolicy(s2, spaceTrimming).toValidString()); - sortKeyFunction = s -> collator.getCollationKey( + sortKeyFunction = s -> threadLocalCollator.get().getCollationKey( applyTrimmingPolicy(s, spaceTrimming).toValidString()).toByteArray(); } return new Collation( normalizedCollationName(), PROVIDER_ICU, - collator, + threadLocalCollator, comparator, ICU_VERSION, sortKeyFunction, diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/BinaryView.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/BinaryView.java new file mode 100644 index 0000000000000..998fff77f29e1 --- /dev/null +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/BinaryView.java @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.unsafe.types; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.KryoSerializable; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ByteBuffer; + +import org.apache.spark.annotation.Unstable; +import org.apache.spark.unsafe.Platform; +import org.apache.spark.unsafe.array.ByteArrayMethods; +import org.apache.spark.unsafe.hash.Murmur3_x86_32; + +import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET; + +/** + * A non-owning view over a contiguous chunk of bytes that may live on-heap or off-heap. + * It is intended as the shared physical carrier for opaque-bytes SQL types whose values + * can be read directly out of an {@code UnsafeRow} / {@code UnsafeArrayData} / + * {@code ColumnVector} backing buffer. + *

+ * Lifetime: a {@code BinaryView} is only valid for as long as the memory it points to is + * alive. Callers that need to retain a value across the source buffer's lifetime must call + * {@link #copy()} first. + */ +@Unstable +public final class BinaryView implements Comparable, Externalizable, KryoSerializable { + + // null when off-heap; a byte[] (or other primitive array) when on-heap. + private Object base; + // For on-heap, this is BYTE_ARRAY_OFFSET + index into the array. For off-heap, this is + // the absolute native address. + private long offset; + private int numBytes; + + /** For Externalizable / KryoSerializable only. */ + public BinaryView() {} + + private BinaryView(Object base, long offset, int numBytes) { + this.base = base; + this.offset = offset; + this.numBytes = numBytes; + } + + // ---------- factories ---------- + + /** + * Creates a view over the given byte array. The array is referenced, not copied; callers + * must not mutate it while the returned view is in use. + */ + public static BinaryView fromBytes(byte[] bytes) { + if (bytes == null) return null; + return new BinaryView(bytes, BYTE_ARRAY_OFFSET, bytes.length); + } + + /** Creates a view over a sub-range of the given byte array (no copy). */ + public static BinaryView fromBytes(byte[] bytes, int offset, int numBytes) { + if (bytes == null) return null; + return new BinaryView(bytes, BYTE_ARRAY_OFFSET + offset, numBytes); + } + + /** + * Creates a view at the given Tungsten-style address. {@code base == null} means off-heap + * and {@code offset} is the absolute native address; otherwise {@code base} is a JVM + * primitive array and {@code offset} is {@code BYTE_ARRAY_OFFSET + index}. + */ + public static BinaryView fromAddress(Object base, long offset, int numBytes) { + return new BinaryView(base, offset, numBytes); + } + + // ---------- accessors ---------- + + /** The backing object: a primitive array when on-heap, or {@code null} when off-heap. */ + public Object getBaseObject() { return base; } + + /** Tungsten-style offset: see the class javadoc. */ + public long getBaseOffset() { return offset; } + + public int numBytes() { return numBytes; } + + public boolean isOffHeap() { return base == null; } + + // ---------- random-access primitive reads ---------- + // Coordinates are relative to the start of this view, i.e. i in [0, numBytes). + + public byte getByte(int i) { + assert i >= 0 && i < numBytes : invalidRangeMessage(i, 1); + return Platform.getByte(base, offset + i); + } + + public short getShort(int i) { + assert i >= 0 && i + 2 <= numBytes : invalidRangeMessage(i, 2); + return Platform.getShort(base, offset + i); + } + + public int getInt(int i) { + assert i >= 0 && i + 4 <= numBytes : invalidRangeMessage(i, 4); + return Platform.getInt(base, offset + i); + } + + public long getLong(int i) { + assert i >= 0 && i + 8 <= numBytes : invalidRangeMessage(i, 8); + return Platform.getLong(base, offset + i); + } + + public float getFloat(int i) { + assert i >= 0 && i + 4 <= numBytes : invalidRangeMessage(i, 4); + return Platform.getFloat(base, offset + i); + } + + public double getDouble(int i) { + assert i >= 0 && i + 8 <= numBytes : invalidRangeMessage(i, 8); + return Platform.getDouble(base, offset + i); + } + + private String invalidRangeMessage(int i, int width) { + return "Invalid access at offset " + i + " (width " + width + ") in BinaryView of " + + numBytes + " bytes"; + } + + // ---------- materialization and slicing ---------- + + /** + * Returns true iff this view owns a tight, on-heap {@code byte[]}: the offset is exactly + * {@code BYTE_ARRAY_OFFSET} and the array length equals {@link #numBytes()}. In that case + * {@link #getBytes()} returns the live backing array, so mutating it writes through to this + * view; otherwise {@code getBytes()} returns a fresh copy. Sliced, sub-range, and off-heap + * views are never tight on-heap arrays. + */ + public boolean hasTightOnHeapArray() { + return offset == BYTE_ARRAY_OFFSET + && base instanceof byte[] bytes + && bytes.length == numBytes; + } + + /** + * Returns the bytes of this view as a {@code byte[]}. + *

+ * Mirrors {@link UTF8String#getBytes()}: if this view already owns a tight, on-heap byte + * array (see {@link #hasTightOnHeapArray()}), the backing array is returned directly + * without copying. Otherwise a fresh array is allocated and the bytes are copied into it. + *

+ * The caller must not mutate the returned array, since when this view owns a tight array + * it is shared with the view itself. Use {@link #copy()} to obtain an independent owned + * value. + */ + public byte[] getBytes() { + if (hasTightOnHeapArray()) { + return (byte[]) base; + } + byte[] out = new byte[numBytes]; + Platform.copyMemory(base, offset, out, BYTE_ARRAY_OFFSET, numBytes); + return out; + } + + /** + * Returns an independent {@code BinaryView} that owns a fresh on-heap byte array + * containing this view's data. Use this before storing the value past the source + * buffer's lifetime. + */ + public BinaryView copy() { + return new BinaryView(copyToNewArray(), BYTE_ARRAY_OFFSET, numBytes); + } + + private byte[] copyToNewArray() { + byte[] out = new byte[numBytes]; + Platform.copyMemory(base, offset, out, BYTE_ARRAY_OFFSET, numBytes); + return out; + } + + /** Returns a sub-view (no copy). */ + public BinaryView slice(int start, int len) { + assert start >= 0 && len >= 0 && start + len <= numBytes + : "Invalid slice start=" + start + " len=" + len + " of view with " + numBytes + " bytes"; + return new BinaryView(base, offset + start, len); + } + + /** + * Copies this view's bytes to the given target memory address. Used by writers that + * already know where the bytes should land (e.g. {@code UnsafeWriter}). + */ + public void writeToMemory(Object target, long targetOffset) { + Platform.copyMemory(base, offset, target, targetOffset, numBytes); + } + + /** + * Wraps this view as a {@link ByteBuffer}. The heap path returns a {@code ByteBuffer.wrap} + * around the existing array (zero-copy); the off-heap path materializes a fresh array + * because there is no portable way to expose an off-heap address through the public + * {@code ByteBuffer} API. + */ + public ByteBuffer toByteBuffer() { + if (base instanceof byte[] bytes && offset >= BYTE_ARRAY_OFFSET) { + long arrayOffset = offset - BYTE_ARRAY_OFFSET; + if ((long) bytes.length < arrayOffset + numBytes) { + throw new ArrayIndexOutOfBoundsException(); + } + return ByteBuffer.wrap(bytes, (int) arrayOffset, numBytes); + } + return ByteBuffer.wrap(copyToNewArray()); + } + + // ---------- equality / hashing / ordering ---------- + + @Override + public int hashCode() { + return Murmur3_x86_32.hashUnsafeBytes(base, offset, numBytes, 42); + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other instanceof BinaryView o) { + return numBytes == o.numBytes + && ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes); + } + return false; + } + + /** Lexicographic, unsigned byte-wise comparison. */ + @Override + public int compareTo(BinaryView other) { + return ByteArray.compareBinary( + base, offset, numBytes, other.base, other.offset, other.numBytes); + } + + // ---------- serialization ---------- + // Both paths always materialize an on-heap byte[] on read so that the deserialized view + // owns its data; senders may pass a view into another buffer. + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeInt(numBytes); + if (numBytes > 0) { + if (hasTightOnHeapArray()) { + out.write((byte[]) base); + } else { + out.write(copyToNewArray()); + } + } + } + + @Override + public void readExternal(ObjectInput in) throws IOException { + int n = in.readInt(); + byte[] bytes = new byte[n]; + in.readFully(bytes); + this.base = bytes; + this.offset = BYTE_ARRAY_OFFSET; + this.numBytes = n; + } + + @Override + public void write(Kryo kryo, Output out) { + out.writeInt(numBytes); + if (numBytes > 0) { + if (hasTightOnHeapArray()) { + out.write((byte[]) base); + } else { + out.write(copyToNewArray()); + } + } + } + + @Override + public void read(Kryo kryo, Input in) { + int n = in.readInt(); + byte[] bytes = new byte[n]; + in.read(bytes); + this.base = bytes; + this.offset = BYTE_ARRAY_OFFSET; + this.numBytes = n; + } +} diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java deleted file mode 100644 index 48b121ba894a5..0000000000000 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.unsafe.types; - -import org.apache.spark.annotation.Unstable; - -import java.io.Serializable; - -// This class represents the physical type for the GEOGRAPHY data type. -@Unstable -public final class GeographyVal implements Comparable, Serializable { - - // The GEOGRAPHY type is implemented as a byte array. We provide `getBytes` and `fromBytes` - // methods for readers and writers to access this underlying array of bytes. - private final byte[] value; - - // We make the constructor private. We should use `fromBytes` to create new instances. - private GeographyVal(byte[] value) { - this.value = value; - } - - public byte[] getBytes() { - return value; - } - - public static GeographyVal fromBytes(byte[] bytes) { - if (bytes == null) { - return null; - } else { - return new GeographyVal(bytes); - } - } - - // Comparison is not yet supported for GEOGRAPHY. - public int compareTo(GeographyVal g) { - throw new UnsupportedOperationException(); - } -} diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java deleted file mode 100644 index 381d3e25c68af..0000000000000 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.unsafe.types; - -import org.apache.spark.annotation.Unstable; - -import java.io.Serializable; - -// This class represents the physical type for the GEOMETRY data type. -@Unstable -public final class GeometryVal implements Comparable, Serializable { - - // The GEOMETRY type is implemented as a byte array. We provide `getBytes` and `fromBytes` - // methods for readers and writers to access this underlying array of bytes. - private final byte[] value; - - // We make the constructor private. We should use `fromBytes` to create new instances. - private GeometryVal(byte[] value) { - this.value = value; - } - - public byte[] getBytes() { - return value; - } - - public static GeometryVal fromBytes(byte[] bytes) { - if (bytes == null) { - return null; - } else { - return new GeometryVal(bytes); - } - } - - // Comparison is not yet supported for GEOMETRY. - public int compareTo(GeometryVal g) { - throw new UnsupportedOperationException(); - } -} diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/BinaryViewSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/BinaryViewSuite.java new file mode 100644 index 0000000000000..1e1216ffec76d --- /dev/null +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/BinaryViewSuite.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.unsafe.types; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.nio.ByteBuffer; + +import org.junit.jupiter.api.Test; + +import org.apache.spark.unsafe.Platform; +import org.apache.spark.unsafe.memory.MemoryAllocator; +import org.apache.spark.unsafe.memory.MemoryBlock; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class BinaryViewSuite { + + private static final byte[] DATA = new byte[] { 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 }; + + @Test + public void nullFactoriesReturnNull() { + assertNull(BinaryView.fromBytes(null)); + assertNull(BinaryView.fromBytes(null, 0, 0)); + } + + @Test + public void onHeapFromBytesReferencesBackingArray() { + BinaryView v = BinaryView.fromBytes(DATA); + assertEquals(DATA.length, v.numBytes()); + assertFalse(v.isOffHeap()); + assertSame(DATA, v.getBaseObject()); + // getBytes() must return the backing array when the view owns a tight, on-heap byte[]. + assertSame(DATA, v.getBytes()); + } + + @Test + public void sliceOfHeapBytes() { + BinaryView full = BinaryView.fromBytes(DATA); + BinaryView mid = full.slice(2, 4); + assertEquals(4, mid.numBytes()); + // Slice shares the backing array but is not a tight owner, so getBytes() must copy. + assertArrayEquals(new byte[] { 30, 40, 50, 60 }, mid.getBytes()); + assertNotSame(DATA, mid.getBytes()); + // Range reads use slice-relative coordinates. + assertEquals(30, mid.getByte(0)); + assertEquals(60, mid.getByte(3)); + } + + @Test + public void hasTightOnHeapArray() { + // A view that owns the whole array is a tight on-heap owner. + assertTrue(BinaryView.fromBytes(DATA).hasTightOnHeapArray()); + // A sub-range view is not, even when on-heap. + assertFalse(BinaryView.fromBytes(DATA, 2, 4).hasTightOnHeapArray()); + assertFalse(BinaryView.fromBytes(DATA).slice(0, DATA.length - 1).hasTightOnHeapArray()); + // copy() always produces a tight on-heap owner. + assertTrue(BinaryView.fromBytes(DATA, 2, 4).copy().hasTightOnHeapArray()); + + MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length); + try { + Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET, + null, block.getBaseOffset(), DATA.length); + BinaryView offHeap = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length); + // An off-heap view is never a tight on-heap owner. + assertFalse(offHeap.hasTightOnHeapArray()); + assertTrue(offHeap.copy().hasTightOnHeapArray()); + } finally { + MemoryAllocator.UNSAFE.free(block); + } + } + + @Test + public void primitiveReaders() { + byte[] bytes = new byte[16]; + Platform.putInt(bytes, Platform.BYTE_ARRAY_OFFSET, 0xCAFEBABE); + Platform.putLong(bytes, Platform.BYTE_ARRAY_OFFSET + 8, 0x1234567890ABCDEFL); + BinaryView v = BinaryView.fromBytes(bytes); + assertEquals(0xCAFEBABE, v.getInt(0)); + assertEquals(0x1234567890ABCDEFL, v.getLong(8)); + } + + @Test + public void copyIsIndependent() { + byte[] bytes = DATA.clone(); + BinaryView v = BinaryView.fromBytes(bytes); + BinaryView c = v.copy(); + assertNotSame(v.getBaseObject(), c.getBaseObject()); + assertArrayEquals(DATA, c.getBytes()); + // Mutating the source must not affect the copy. + bytes[0] = 99; + assertEquals(10, c.getBytes()[0]); + } + + @Test + public void offHeapView() { + MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length); + try { + Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET, + null, block.getBaseOffset(), DATA.length); + BinaryView v = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length); + assertTrue(v.isOffHeap()); + assertNull(v.getBaseObject()); + assertEquals(DATA.length, v.numBytes()); + // getBytes() on an off-heap view must materialize a new array. + byte[] materialized = v.getBytes(); + assertArrayEquals(DATA, materialized); + // copy() materializes to an on-heap, tight owner. + BinaryView c = v.copy(); + assertFalse(c.isOffHeap()); + assertArrayEquals(DATA, c.getBytes()); + } finally { + MemoryAllocator.UNSAFE.free(block); + } + } + + @Test + public void equalsAcrossHeapAndOffHeap() { + MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length); + try { + Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET, + null, block.getBaseOffset(), DATA.length); + BinaryView heap = BinaryView.fromBytes(DATA); + BinaryView offHeap = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length); + assertEquals(heap, offHeap); + assertEquals(heap.hashCode(), offHeap.hashCode()); + } finally { + MemoryAllocator.UNSAFE.free(block); + } + } + + @Test + public void compareToIsUnsignedLexicographic() { + BinaryView a = BinaryView.fromBytes(new byte[] { 1, 2, 3 }); + BinaryView b = BinaryView.fromBytes(new byte[] { 1, 2, 4 }); + BinaryView c = BinaryView.fromBytes(new byte[] { 1, 2, 3, 0 }); + BinaryView neg = BinaryView.fromBytes(new byte[] { (byte) 0x80 }); + BinaryView pos = BinaryView.fromBytes(new byte[] { 0x7F }); + assertTrue(a.compareTo(b) < 0); + assertTrue(b.compareTo(a) > 0); + assertTrue(a.compareTo(c) < 0); // shorter prefix is less + assertEquals(0, a.compareTo(BinaryView.fromBytes(new byte[] { 1, 2, 3 }))); + // Unsigned byte comparison: 0x80 > 0x7F. + assertTrue(neg.compareTo(pos) > 0); + } + + @Test + public void byteBufferRoundTripHeap() { + BinaryView v = BinaryView.fromBytes(DATA); + ByteBuffer bb = v.toByteBuffer(); + assertTrue(bb.hasArray()); + assertEquals(DATA.length, bb.remaining()); + byte[] out = new byte[DATA.length]; + bb.get(out); + assertArrayEquals(DATA, out); + } + + @Test + public void byteBufferOffHeapMaterializes() { + MemoryBlock block = MemoryAllocator.UNSAFE.allocate(DATA.length); + try { + Platform.copyMemory(DATA, Platform.BYTE_ARRAY_OFFSET, + null, block.getBaseOffset(), DATA.length); + BinaryView v = BinaryView.fromAddress(null, block.getBaseOffset(), DATA.length); + ByteBuffer bb = v.toByteBuffer(); + // For off-heap, toByteBuffer materializes into a fresh on-heap array. + assertTrue(bb.hasArray()); + byte[] out = new byte[DATA.length]; + bb.get(out); + assertArrayEquals(DATA, out); + } finally { + MemoryAllocator.UNSAFE.free(block); + } + } + + @Test + public void writeToMemoryRoundTrip() { + BinaryView v = BinaryView.fromBytes(DATA); + byte[] target = new byte[DATA.length + 4]; + v.writeToMemory(target, Platform.BYTE_ARRAY_OFFSET + 2); + for (int i = 0; i < DATA.length; i++) { + assertEquals(DATA[i], target[i + 2]); + } + } + + @Test + public void javaSerializationRoundTrip() throws Exception { + // Serialize a view that points at a sub-range of a larger array; deserialized value + // must own a tight backing array containing only the visible bytes. + BinaryView v = BinaryView.fromBytes(DATA, 2, 4); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (ObjectOutputStream out = new ObjectOutputStream(baos)) { + out.writeObject(v); + } + BinaryView read; + try (ObjectInputStream in = + new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()))) { + read = (BinaryView) in.readObject(); + } + assertEquals(4, read.numBytes()); + assertArrayEquals(new byte[] { 30, 40, 50, 60 }, read.getBytes()); + assertFalse(read.isOffHeap()); + assertEquals(v, read); + } + + @Test + public void kryoSerializationRoundTrip() { + Kryo kryo = new Kryo(); + kryo.register(BinaryView.class); + BinaryView v = BinaryView.fromBytes(DATA, 3, 5); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (Output out = new Output(baos)) { + kryo.writeObject(out, v); + } + BinaryView read; + try (Input in = new Input(new ByteArrayInputStream(baos.toByteArray()))) { + read = kryo.readObject(in, BinaryView.class); + } + assertEquals(5, read.numBytes()); + assertArrayEquals(new byte[] { 40, 50, 60, 70, 80 }, read.getBytes()); + assertEquals(v, read); + } + +} diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java deleted file mode 100644 index 639a8b2f77821..0000000000000 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.unsafe.types; - -import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; - -public class GeographyValSuite { - - @Test - public void roundTripBytes() { - // A simple byte array to test the round trip (`fromBytes` -> `getBytes`). - byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 }; - GeographyVal geographyVal = GeographyVal.fromBytes(bytes); - assertNotNull(geographyVal); - assertArrayEquals(bytes, geographyVal.getBytes()); - } - - @Test - public void roundNullHandling() { - // A simple null byte array to test null handling for GEOGRAPHY. - byte[] bytes = null; - GeographyVal geographyVal = GeographyVal.fromBytes(bytes); - assertNull(geographyVal); - } - - @Test - public void testCompareTo() { - // Comparison is not yet supported for GEOGRAPHY. - byte[] bytes1 = new byte[] { 1, 2, 3 }; - byte[] bytes2 = new byte[] { 4, 5, 6 }; - GeographyVal geographyVal1 = GeographyVal.fromBytes(bytes1); - GeographyVal geographyVal2 = GeographyVal.fromBytes(bytes2); - try { - geographyVal1.compareTo(geographyVal2); - } catch (UnsupportedOperationException e) { - assert(e.toString().equals("java.lang.UnsupportedOperationException")); - } - } -} diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java deleted file mode 100644 index e38c6903e6ddc..0000000000000 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.unsafe.types; - -import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; - -public class GeometryValSuite { - - @Test - public void roundTripBytes() { - // A simple byte array to test the round trip (`fromBytes` -> `getBytes`). - byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 }; - GeometryVal geometryVal = GeometryVal.fromBytes(bytes); - assertNotNull(geometryVal); - assertArrayEquals(bytes, geometryVal.getBytes()); - } - - @Test - public void roundNullHandling() { - // A simple null byte array to test null handling for GEOMETRY. - byte[] bytes = null; - GeometryVal geometryVal = GeometryVal.fromBytes(bytes); - assertNull(geometryVal); - } - - @Test - public void testCompareTo() { - // Comparison is not yet supported for GEOMETRY. - byte[] bytes1 = new byte[] { 1, 2, 3 }; - byte[] bytes2 = new byte[] { 4, 5, 6 }; - GeometryVal geometryVal1 = GeometryVal.fromBytes(bytes1); - GeometryVal geometryVal2 = GeometryVal.fromBytes(bytes2); - try { - geometryVal1.compareTo(geometryVal2); - } catch (UnsupportedOperationException e) { - assert(e.toString().equals("java.lang.UnsupportedOperationException")); - } - } -} diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala index ddf588b6c64c7..87f1d0a1c75f3 100644 --- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala +++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala @@ -300,6 +300,37 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig }) } + test("test concurrent comparator, sortKeyFunction, and getCollator on ICU collations") { + // Thread-local collator instances avoid lock contention on ICU's internal collation buffer. + // This test verifies correctness under concurrent access for all three paths: + // comparator, sortKeyFunction, and getCollator(). + val collationNames = Seq("UNICODE", "en", "de", "en_CI", "en_AI") + collationNames.foreach { name => + val collation = fetchCollation(name) + val s1 = toUTF8("apple") + val s2 = toUTF8("banana") + val expectedCmp = collation.comparator.compare(s1, s2) + val expectedKey = collation.sortKeyFunction.apply(s1).asInstanceOf[Array[Byte]] + val expectedCollatorKey = + collation.getCollator.getCollationKey(s1.toValidString()).toByteArray + + (0 to 5).foreach(_ => { + IntStream.rangeClosed(0, 200).parallel().forEach { _ => + val cmp = collation.comparator.compare(s1, s2) + assert(cmp == expectedCmp, + s"Comparator returned inconsistent result for $name") + val key = collation.sortKeyFunction.apply(s1).asInstanceOf[Array[Byte]] + assert(java.util.Arrays.equals(key, expectedKey), + s"sortKeyFunction returned inconsistent result for $name") + val collatorKey = + collation.getCollator.getCollationKey(s1.toValidString()).toByteArray + assert(java.util.Arrays.equals(collatorKey, expectedCollatorKey), + s"getCollator().getCollationKey() returned inconsistent result for $name") + } + }) + } + } + test("test collation caching") { Seq( "UTF8_BINARY", diff --git a/common/utils-java/pom.xml b/common/utils-java/pom.xml index cd06b89da9939..433bffd7e405e 100644 --- a/common/utils-java/pom.xml +++ b/common/utils-java/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java b/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java index d8238912aec63..e92ef6f462a3f 100644 --- a/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java +++ b/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java @@ -331,6 +331,10 @@ public enum LogKeys implements LogKey { LABEL_COLUMN, LARGEST_CLUSTER_INDEX, LAST_ACCESS_TIME, + LAST_ATTEMPT_ACC_INVALIDATE, + LAST_ATTEMPT_ACC_SYSTEM_METRIC, + LAST_ATTEMPT_ACC_UNEXPECTED_REASON, + LAST_ATTEMPT_ACC_USER_METRIC, LAST_COMMITTED_CHECKPOINT_ID, LAST_COMMIT_BASED_CHECKPOINT_ID, LAST_SCAN_TIME, @@ -452,6 +456,7 @@ public enum LogKeys implements LogKey { NUM_BYTES_USED, NUM_CATEGORIES, NUM_CHECKSUM_FILE, + NUM_CHILDREN, NUM_CHUNKS, NUM_CLASSES, NUM_COEFFICIENTS, diff --git a/common/utils/pom.xml b/common/utils/pom.xml index be2447b3ec4e4..296c30a6d25f7 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 7c5017b97f0db..926019df1e74f 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -191,6 +191,129 @@ ], "sqlState" : "0A000" }, + "AUTOCDC_BOTH_COLUMN_LIST_AND_EXCEPT_COLUMN_LIST" : { + "message" : [ + "AutoCDC flow specifies both `column_list` and `except_column_list`; at most one may be provided." + ], + "sqlState" : "42613" + }, + "AUTOCDC_COLUMNS_NOT_FOUND_IN_SCHEMA" : { + "message" : [ + "Using column name comparison, the following columns are not present in the schema: . Available columns: ." + ], + "sqlState" : "42703" + }, + "AUTOCDC_EMPTY_KEYS" : { + "message" : [ + "AutoCDC requires at least one key column to identify rows, but received an empty key set." + ], + "sqlState" : "22023" + }, + "AUTOCDC_INVALID_STATE" : { + "message" : [ + "AutoCDC flow detected an invalid state:" + ], + "subClass" : { + "AUXILIARY_TABLE_KEY_COLUMN_MISSING" : { + "message" : [ + "The auxiliary table is missing key column that is recorded in its table property. The auxiliary table schema may be corrupted or have been modified externally. Perform a full refresh of the target table to recreate the auxiliary table." + ] + }, + "AUXILIARY_TABLE_PROPERTY_MALFORMED" : { + "message" : [ + "The auxiliary table has a malformed property with raw value ''. The property must be a JSON array of strings (e.g. '[\"id\",\"region\"]'). The auxiliary table metadata may be corrupted or have been modified externally. Perform a full refresh of the target table to recreate the auxiliary table." + ] + }, + "AUXILIARY_TABLE_PROPERTY_MISSING" : { + "message" : [ + "The auxiliary table is missing the required table property; cannot validate AutoCDC key columns. The auxiliary table metadata may be corrupted or have been modified externally. Perform a full refresh of the target table to recreate the auxiliary table." + ] + }, + "KEY_SCHEMA_DRIFT" : { + "message" : [ + "The AutoCDC flow's current key columns do not match the keys recorded in the auxiliary table (recorded keys ). AutoCDC does not support changing key columns or their types across incremental pipeline runs. To change keys, perform a full refresh of the target table." + ] + } + }, + "sqlState" : "42000" + }, + "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA" : { + "message" : [ + "Using column name comparison, the AutoCDC key column `` is not present in the flow's selected source schema. AutoCDC requires every key column to be present in the source change-data feed and retained by any configured column selection." + ], + "sqlState" : "22023" + }, + "AUTOCDC_MICROBATCH_VALIDATION" : { + "message" : [ + "AutoCDC flow on table in batch failed microbatch validation." + ], + "subClass" : { + "NON_ORDERABLE_SEQUENCE" : { + "message" : [ + "The sequencing column has non-orderable type . The sequencing column must be of a type that supports ordering." + ] + }, + "NULL_KEY" : { + "message" : [ + "The microbatch contains rows with null values in the following key column(s): . All rows must have non-null values for every key column." + ] + }, + "NULL_SEQUENCE" : { + "message" : [ + "The microbatch contains row(s) with a null sequencing value. All rows must have a non-null sequencing value." + ] + } + }, + "sqlState" : "22000" + }, + "AUTOCDC_MISSING_SEQUENCE_BY" : { + "message" : [ + "AutoCDC flow is missing a required `sequence_by` expression. Specify a `sequence_by` column or expression that orders incoming change events." + ], + "sqlState" : "22023" + }, + "AUTOCDC_MISSING_SOURCE" : { + "message" : [ + "AutoCDC flow is missing a required `source` table name. Specify the name of the streaming source table the flow should read from." + ], + "sqlState" : "22023" + }, + "AUTOCDC_MULTIPART_COLUMN_IDENTIFIER" : { + "message" : [ + "Expected a single column identifier; got the multi-part identifier (parts: )." + ], + "sqlState" : "22023" + }, + "AUTOCDC_MULTIPLE_FLOWS_TO_TARGET" : { + "message" : [ + "Invalid AutoCDC destination with multiple flows: . An AutoCDC target table must have exactly one flow writing to it." + ], + "sqlState" : "42000" + }, + "AUTOCDC_NON_COLUMN_IDENTIFIER" : { + "message" : [ + "Expected a column identifier; got the non-attribute expression ``. AutoCDC keys, sequence_by, column_list, and except_column_list must reference unqualified column names." + ], + "sqlState" : "22023" + }, + "AUTOCDC_RESERVED_COLUMN_NAME_PREFIX_CONFLICT" : { + "message" : [ + "The column `` in the schema collides with the reserved AutoCDC column name prefix `` (using column name comparison). Rename or remove the column." + ], + "sqlState" : "42710" + }, + "AUTOCDC_SCD2_NOT_SUPPORTED" : { + "message" : [ + "AutoCDC flows do not currently support SCD Type 2 transformations." + ], + "sqlState" : "0A000" + }, + "AUTOCDC_TARGET_DOES_NOT_SUPPORT_MERGE" : { + "message" : [ + "Cannot start AutoCDC flow: the target table (format: ) does not support row-level operations. AutoCDC requires a target backed by a connector that supports MERGE." + ], + "sqlState" : "0A000" + }, "AVRO_CANNOT_WRITE_NULL_FIELD" : { "message" : [ "Cannot write null value for field defined as non-null Avro data type .", @@ -661,6 +784,29 @@ ], "sqlState" : "42P08" }, + "CHANGELOG_CONTRACT_VIOLATION" : { + "message" : [ + "The Change Data Capture (CDC) connector violated the `Changelog` contract at runtime." + ], + "subClass" : { + "NULL_COMMIT_TIMESTAMP" : { + "message" : [ + "Connector emitted a row with a NULL `_commit_timestamp` on a streaming read engaging post-processing. The `Changelog` contract requires `_commit_timestamp` to be non-NULL for streaming reads, since post-processing uses it as event time to advance the watermark." + ] + }, + "UNEXPECTED_CHANGE_TYPE" : { + "message" : [ + "Connector emitted a row with a `_change_type` value that is not one of the four supported types (`insert`, `delete`, `update_preimage`, `update_postimage`). The `Changelog` contract requires every emitted row to carry one of these four values." + ] + }, + "UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION" : { + "message" : [ + "Connector emitted multiple delete or insert rows for the same `(rowId, _commit_version)` partition. The `Changelog` contract requires at most one logical change per row identity per commit when `containsIntermediateChanges() = false`. Either fix the connector to deduplicate intermediate states, or set `containsIntermediateChanges() = true` and use `deduplicationMode = netChanges`." + ] + } + }, + "sqlState" : "XX000" + }, "CHECKPOINT_FILE_CHECKSUM_VERIFICATION_FAILED" : { "message" : [ "Checksum verification failed, the file may be corrupted. File: ", @@ -2785,6 +2931,12 @@ "" ] }, + "COLUMN_ID_MISMATCH" : { + "message" : [ + "Column IDs have changed:", + "" + ] + }, "METADATA_COLUMNS_MISMATCH" : { "message" : [ "Metadata columns have changed:", @@ -2968,6 +3120,13 @@ ], "sqlState" : "0A000" }, + "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED" : { + "message" : [ + "Table alias is not allowed with INSERT INTO ... REPLACE WHERE because the WHERE condition is evaluated against the target table directly.", + "Use INSERT INTO ... REPLACE ON if you need to reference the target table via an alias." + ], + "sqlState" : "42000" + }, "INSUFFICIENT_TABLE_PROPERTY" : { "message" : [ "Can't find table property:" @@ -3278,6 +3437,34 @@ "message" : [ "`startingVersion` is required when `endingVersion` is specified for CDC queries." ] + }, + "UPDATE_DETECTION_REQUIRES_CARRY_OVER_REMOVAL" : { + "message" : [ + "`computeUpdates` cannot be used with `deduplicationMode=none` on connector `` because the connector emits copy-on-write carry-over pairs (`containsCarryoverRows()` returns true) that would be silently mislabeled as updates. Set `deduplicationMode` to `dropCarryovers` or `netChanges`." + ] + } + }, + "sqlState" : "42K03" + }, + "INVALID_CHANGELOG_SCHEMA" : { + "message" : [ + "The Change Data Capture (CDC) schema returned by connector is invalid." + ], + "subClass" : { + "INVALID_COLUMN_TYPE" : { + "message" : [ + "Column `` has type , expected ." + ] + }, + "MISSING_COLUMN" : { + "message" : [ + "Required column `` is missing." + ] + }, + "MISSING_ROW_ID" : { + "message" : [ + "Connector advertises one or more post-processing properties (`containsCarryoverRows`, `representsUpdateAsDeleteAndInsert`, `containsIntermediateChanges`) that require row identity, but `Changelog.rowId()` returned an empty array." + ] } }, "sqlState" : "42K03" @@ -3587,6 +3774,11 @@ "Flow returns an invalid relation type." ], "subClass" : { + "AUTOCDC_RELATION_FOR_TEMPORARY_VIEW" : { + "message" : [ + "AutoCDC flows must target a streaming table because their reconciliation semantics require a streaming-table sink, but the flow attempts to write an AutoCDC relation to the temporary view ." + ] + }, "BATCH_RELATION_FOR_STREAMING_TABLE" : { "message" : [ "Streaming tables may only be defined by streaming relations, but the flow attempts to write a batch relation to the streaming table . Consider using the STREAM operator in Spark-SQL to convert the batch relation into a streaming relation, or populating the streaming table with an append once-flow instead." @@ -4066,6 +4258,12 @@ }, "sqlState" : "KD002" }, + "INVALID_METRIC_VIEW_YAML" : { + "message" : [ + "Failed to parse metric view YAML: " + ], + "sqlState" : "42K0L" + }, "INVALID_NAME_IN_USE_COMMAND" : { "message" : [ "Invalid name '' in command. Reason: " @@ -5262,6 +5460,49 @@ ], "sqlState" : "0A000" }, + "NEAREST_BY_JOIN" : { + "message" : [ + "Invalid nearest-by join." + ], + "subClass" : { + "CROSS_JOIN_NOT_ENABLED" : { + "message" : [ + "Nearest-by join is implemented as a bounded cross-product internally and is therefore rejected when `spark.sql.crossJoin.enabled = false`. Set `spark.sql.crossJoin.enabled = true` to permit it, or rewrite the query without nearest-by." + ] + }, + "NON_ORDERABLE_RANKING_EXPRESSION" : { + "message" : [ + "The ranking expression of type is not orderable. Provide an expression that returns an orderable type, such as a numeric distance like abs(a.col - b.col) or a numeric similarity score." + ] + }, + "NUM_RESULTS_OUT_OF_RANGE" : { + "message" : [ + "The number of results must be between and . Update the literal in `APPROX NEAREST BY ...` (or `EXACT NEAREST BY ...`) to fall within that range." + ] + }, + "STREAMING_NOT_SUPPORTED" : { + "message" : [ + "Nearest-by join is not supported with streaming DataFrames/Datasets." + ] + }, + "UNSUPPORTED_DIRECTION" : { + "message" : [ + "Unsupported nearest-by join direction ''. Supported nearest-by join directions include: ." + ] + }, + "UNSUPPORTED_JOIN_TYPE" : { + "message" : [ + "Unsupported nearest-by join type . Supported types: ." + ] + }, + "UNSUPPORTED_MODE" : { + "message" : [ + "Unsupported nearest-by join mode ''. Supported modes include: ." + ] + } + }, + "sqlState" : "42604" + }, "NEGATIVE_SCALE_DISALLOWED" : { "message" : [ "Negative scale is not allowed: ''. Set the config to \"true\" to allow it." @@ -5922,6 +6163,12 @@ ], "sqlState" : "42836" }, + "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE" : { + "message" : [ + "Recursive file loading is not supported when the data source has explicit partition columns. Either remove the option \"recursiveFileLookup\", or read the data without supplying partition columns (for example, do not read a partitioned table)." + ], + "sqlState" : "0A000" + }, "RECURSIVE_PROTOBUF_SCHEMA" : { "message" : [ "Found recursive reference in Protobuf schema, which can not be processed by Spark by default: . try setting the option `recursive.fields.max.depth` 1 to 10. Going beyond 10 levels of recursion is not allowed." @@ -6798,6 +7045,11 @@ "Duplicate streaming source names detected: . Each streaming source must have a unique name." ] }, + "INVALID_SINK_NAME" : { + "message" : [ + "Invalid streaming sink name: ''. Sink names must only contain ASCII letters ('a'-'z', 'A'-'Z'), digits ('0'-'9'), and underscores ('_')." + ] + }, "INVALID_SOURCE_NAME" : { "message" : [ "Invalid streaming source name ''. Source names must only contain ASCII letters (a-z, A-Z), digits (0-9), and underscores (_)." @@ -6808,6 +7060,11 @@ "Streaming source naming is not supported. Source name '' was provided but the feature is disabled. Please enable the feature by setting spark.sql.streaming.queryEvolution.enableSourceEvolution to true." ] }, + "UNNAMED_STREAMING_SINKS_WITH_ENFORCEMENT" : { + "message" : [ + "Streaming sink must be named when spark.sql.streaming.queryEvolution.enableSinkEvolution is enabled. Use the name() method on DataStreamWriter to assign a name to the streaming sink." + ] + }, "UNNAMED_STREAMING_SOURCES_WITH_ENFORCEMENT" : { "message" : [ "All streaming sources must be named when spark.sql.streaming.queryEvolution.enableSourceEvolution is enabled. Unnamed sources found: . Use the name() method to assign names to all streaming sources." @@ -6921,6 +7178,12 @@ ], "sqlState" : "22023" }, + "ST_INVALID_ENDIANNESS_VALUE" : { + "message" : [ + "Endianness '' must be either 'NDR' (little-endian) or 'XDR' (big-endian)." + ], + "sqlState" : "22023" + }, "ST_INVALID_SRID_VALUE" : { "message" : [ "Invalid or unsupported SRID (spatial reference identifier) value: ." @@ -7786,6 +8049,11 @@ "Referencing a lateral column alias in window expression ." ] }, + "LATERAL_JOIN_NEAREST_BY" : { + "message" : [ + "LATERAL correlation with NEAREST BY clause." + ] + }, "LATERAL_JOIN_USING" : { "message" : [ "JOIN USING with LATERAL correlation." @@ -7973,6 +8241,26 @@ "Store backend is not supported by TransformWithState operator. Please use RocksDBStateStoreProvider." ] }, + "TABLESAMPLE_SYSTEM" : { + "message" : [ + "TABLESAMPLE SYSTEM is only supported by data sources that implement block-level sampling." + ] + }, + "TABLESAMPLE_SYSTEM_NO_SCAN" : { + "message" : [ + "TABLESAMPLE SYSTEM requires a direct reference to a data source table that supports block-level sampling. It cannot be applied to subqueries, views, or tables with intervening operations." + ] + }, + "TABLESAMPLE_SYSTEM_REPEATABLE" : { + "message" : [ + "TABLESAMPLE SYSTEM does not support the REPEATABLE clause. Use TABLESAMPLE BERNOULLI for repeatable sampling with a seed." + ] + }, + "TABLESAMPLE_SYSTEM_SAMPLE_METHOD" : { + "message" : [ + "TABLESAMPLE SYSTEM does not support sampling. Only PERCENT sampling is supported." + ] + }, "TABLE_OPERATION" : { "message" : [ "Table does not support . Please check the current catalog and namespace to make sure the qualified table name is expected, and also check the catalog implementation which is configured by \"spark.sql.catalog\"." @@ -8171,6 +8459,29 @@ }, "sqlState" : "0A000" }, + "UNSUPPORTED_SCHEMA_EVOLUTION" : { + "message" : [ + "Schema evolution is not supported for this write." + ], + "subClass" : { + "CREATE_TABLE" : { + "message" : [ + "Creating a new table does not support schema evolution." + ] + }, + "REPLACE_TABLE" : { + "message" : [ + "Replacing a table does not support schema evolution." + ] + }, + "V1_TABLE" : { + "message" : [ + "Writes to V1 tables or V1 data sources do not support schema evolution." + ] + } + }, + "sqlState" : "0A000" + }, "UNSUPPORTED_SHOW_CREATE_TABLE" : { "message" : [ "Unsupported a SHOW CREATE TABLE command." @@ -8181,6 +8492,11 @@ "The table is a Spark data source table. Please use SHOW CREATE TABLE without AS SERDE instead." ] }, + "ON_METRIC_VIEW" : { + "message" : [ + "The command is not supported on a metric view ." + ] + }, "ON_TEMPORARY_VIEW" : { "message" : [ "The command is not supported on a temporary view ." diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala index 810bdabebb38a..3fac57dbe5dda 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala @@ -95,6 +95,11 @@ class LogEntry(messageWithContext: => MessageWithContext) { def message: String = cachedMessageWithContext.message def context: java.util.Map[String, String] = cachedMessageWithContext.context + + def +(other: LogEntry): LogEntry = { + val combined = cachedMessageWithContext + other.cachedMessageWithContext + new LogEntry(combined) + } } /** diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala index fa5f99a1aae25..0af0e0f6de457 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkTestUtils.scala @@ -53,8 +53,8 @@ private[spark] trait SparkTestUtils { Seq( "-classpath", classpathUrls - .map { - _.getFile + .map { u => + new File(u.toURI).getPath } .mkString(File.pathSeparator)) } else { @@ -123,7 +123,8 @@ private[spark] trait SparkTestUtils { val options = Seq("-d", classDir.getAbsolutePath) ++ ( if (classpathUrls.nonEmpty) { - Seq("-classpath", classpathUrls.map(_.getFile).mkString(File.pathSeparator)) + Seq("-classpath", + classpathUrls.map(u => new File(u.toURI).getPath).mkString(File.pathSeparator)) } else Seq.empty ) @@ -177,7 +178,7 @@ private[spark] trait SparkTestUtils { // on Windows to work around CMD's command-line length limit and by some build/CI // tools. Expand any such JARs before invoking scalac so the classpath is complete. val expandedClasspath = classpathUrls.flatMap(expandManifestClasspath) - val cpStr = expandedClasspath.map(_.getFile).mkString(File.pathSeparator) + val cpStr = expandedClasspath.map(u => new File(u.toURI).getPath).mkString(File.pathSeparator) val args = Array("-classpath", cpStr, "-d", classDir.getAbsolutePath) ++ sourceFiles.map(_.getAbsolutePath) @@ -216,7 +217,7 @@ private[spark] trait SparkTestUtils { * original URL unchanged. */ private[spark] def expandManifestClasspath(url: URL): Seq[URL] = { - val file = new File(url.getFile) + val file = new File(url.toURI) if (!file.exists() || !file.getName.endsWith(".jar")) return Seq(url) try { val jarFile = new JarFile(file) diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala index 8b2807a80dd10..791be198a111c 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkThreadUtils.scala @@ -22,7 +22,7 @@ import scala.concurrent.Awaitable import scala.concurrent.duration.Duration import scala.util.control.NonFatal -import org.apache.spark.SparkException +import org.apache.spark.{SparkException, SparkThrowable} private[spark] object SparkThreadUtils { // scalastyle:off awaitresult @@ -41,6 +41,14 @@ private[spark] object SparkThreadUtils { */ @throws(classOf[SparkException]) def awaitResult[T](awaitable: Awaitable[T], atMost: Duration): T = { + awaitResult(awaitable, atMost, preserveSparkThrowable = false) + } + + @throws(classOf[SparkException]) + def awaitResult[T]( + awaitable: Awaitable[T], + atMost: Duration, + preserveSparkThrowable: Boolean): T = { try { awaitResultNoSparkExceptionConversion(awaitable, atMost) } catch { @@ -48,6 +56,15 @@ private[spark] object SparkThreadUtils { throw e.throwable // TimeoutException is thrown in the current thread, so not need to warp // the exception. + // Re-throw exceptions that already carry a structured condition (SparkThrowable) + // to avoid wrapping them in a generic SparkException and losing the SQL state. + case st: Exception with SparkThrowable + if preserveSparkThrowable + && !st.isInstanceOf[TimeoutException] && st.getCondition != null => + // Attach the caller's stack trace so it's not lost when re-throwing from a worker thread. + st.addSuppressed( + new SparkException("Exception thrown in awaitResult", cause = null)) + throw st case NonFatal(t) if !t.isInstanceOf[TimeoutException] => throw new SparkException("Exception thrown in awaitResult: ", t) diff --git a/common/utils/src/test/scala/org/apache/spark/util/SparkTestUtilsSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/SparkTestUtilsSuite.scala new file mode 100644 index 0000000000000..10a599739f6cf --- /dev/null +++ b/common/utils/src/test/scala/org/apache/spark/util/SparkTestUtilsSuite.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite + +class SparkTestUtilsSuite extends AnyFunSuite with SparkTestUtils { // scalastyle:ignore funsuite + + test("SPARK-57081: createCompiledClass with spaces in classpath") { + val dir = SparkFileUtils.createTempDir(namePrefix = "path with spaces") + val sourceFile = new JavaSourceFromString("Hello", "public class Hello {}") + val result = createCompiledClass("Hello", dir, sourceFile, Seq(dir.toURI.toURL)) + assert(result.exists(), s"Compiled class file should exist at ${result.getPath}") + } +} diff --git a/common/variant/pom.xml b/common/variant/pom.xml index a2bc7040e23c9..2ddd78eb7f17d 100644 --- a/common/variant/pom.xml +++ b/common/variant/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java index 1bd008a5c9149..aaf6f72bd5364 100644 --- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java +++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java @@ -43,7 +43,12 @@ */ public class VariantBuilder { public VariantBuilder(boolean allowDuplicateKeys) { + this(allowDuplicateKeys, true); + } + + public VariantBuilder(boolean allowDuplicateKeys, boolean validateUnicodeInJsonParsing) { this.allowDuplicateKeys = allowDuplicateKeys; + this.validateUnicodeInJsonParsing = validateUnicodeInJsonParsing; } /** @@ -53,18 +58,41 @@ public VariantBuilder(boolean allowDuplicateKeys) { * @throws IOException if any JSON parsing error happens. */ public static Variant parseJson(String json, boolean allowDuplicateKeys) throws IOException { + return parseJson(json, allowDuplicateKeys, true); + } + + /** + * Similar to {@link #parseJson(String, boolean)}, but additionally controls whether JSON + * string contents are validated to be well-formed Unicode (no unpaired UTF-16 surrogate code + * units). Strict validation is the default and matches RFC 8259 section 7. The flag exists + * to allow callers to opt out for backward compatibility with input that previously parsed + * (with the unpaired surrogate silently replaced by the Unicode replacement character). + */ + public static Variant parseJson(String json, boolean allowDuplicateKeys, + boolean validateUnicodeInJsonParsing) throws IOException { try (JsonParser parser = new JsonFactory().createParser(json)) { parser.nextToken(); - return parseJson(parser, allowDuplicateKeys); + return parseJson(parser, allowDuplicateKeys, validateUnicodeInJsonParsing); } } /** - * Similar {@link #parseJson(String, boolean)}, but takes a JSON parser instead of string input. + * Similar to {@link #parseJson(String, boolean)}, but takes a JSON parser instead of string + * input. */ public static Variant parseJson(JsonParser parser, boolean allowDuplicateKeys) throws IOException { - VariantBuilder builder = new VariantBuilder(allowDuplicateKeys); + return parseJson(parser, allowDuplicateKeys, true); + } + + /** + * Similar to {@link #parseJson(JsonParser, boolean)}, but additionally controls whether JSON + * string contents are validated to be well-formed Unicode. See + * {@link #parseJson(String, boolean, boolean)}. + */ + public static Variant parseJson(JsonParser parser, boolean allowDuplicateKeys, + boolean validateUnicodeInJsonParsing) throws IOException { + VariantBuilder builder = new VariantBuilder(allowDuplicateKeys, validateUnicodeInJsonParsing); builder.buildJson(parser); return builder.result(); } @@ -495,6 +523,9 @@ private void buildJson(JsonParser parser) throws IOException { int start = writePos; while (parser.nextToken() != JsonToken.END_OBJECT) { String key = parser.currentName(); + if (validateUnicodeInJsonParsing) { + checkValidUnicodeString(key, parser); + } parser.nextToken(); int id = addKey(key); fields.add(new FieldEntry(key, id, writePos - start)); @@ -513,9 +544,14 @@ private void buildJson(JsonParser parser) throws IOException { finishWritingArray(start, offsets); break; } - case VALUE_STRING: - appendString(parser.getText()); + case VALUE_STRING: { + String text = parser.getText(); + if (validateUnicodeInJsonParsing) { + checkValidUnicodeString(text, parser); + } + appendString(text); break; + } case VALUE_NUMBER_INT: try { appendLong(parser.getLongValue()); @@ -557,6 +593,30 @@ private void parseFloatingPoint(JsonParser parser) throws IOException { } } + // Reject JSON strings that contain unpaired UTF-16 surrogate code units. Java strings can + // hold lone surrogates, but RFC 8259 section 7 requires JSON string contents to be well-formed + // Unicode. Stricter parsers such as simdjson reject these inputs, while Jackson's + // `ReaderBasedJsonParser` accepts them and silently replaces the invalid character with U+FFFD + // when the result is encoded as UTF-8. That silent replacement causes data corruption, so + // we surface a JSON parse error instead. + private static void checkValidUnicodeString(String str, JsonParser parser) + throws JsonParseException { + int len = str.length(); + for (int i = 0; i < len; ++i) { + char c = str.charAt(i); + if (Character.isHighSurrogate(c)) { + if (i + 1 >= len || !Character.isLowSurrogate(str.charAt(i + 1))) { + throw new JsonParseException(parser, String.format( + "Invalid Unicode in JSON string: lone high surrogate U+%04X", (int) c)); + } + ++i; + } else if (Character.isLowSurrogate(c)) { + throw new JsonParseException(parser, String.format( + "Invalid Unicode in JSON string: lone low surrogate U+%04X", (int) c)); + } + } + } + // Try to parse a JSON number as a decimal. Return whether the parsing succeeds. The input must // only use the decimal format (an integer value with an optional '.' in it) and must not use // scientific notation. It also must fit into the precision limitation of decimal types. @@ -583,4 +643,8 @@ private boolean tryParseDecimal(String input) { // Store all keys in `dictionary` in the order of id. private final ArrayList dictionaryKeys = new ArrayList<>(); private final boolean allowDuplicateKeys; + // When true, JSON string contents are validated to be well-formed Unicode (RFC 8259 sec 7). + // Unpaired UTF-16 surrogate code units cause a `JsonParseException` to be thrown during + // `buildJson`, which surfaces as a `MALFORMED_RECORD_IN_PARSING` error to SQL callers. + private final boolean validateUnicodeInJsonParsing; } diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java index 795d46ec2062b..ac93246991c0e 100644 --- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java +++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java @@ -428,7 +428,8 @@ private static void checkDecimal(BigDecimal d, int maxPrecision) { // Get a decimal value from variant value `value[pos...]`. // Throw `MALFORMED_VARIANT` if the variant is malformed. public static BigDecimal getDecimalWithOriginalScale(byte[] value, int pos) { - checkIndex(pos, value.length); + // Decimal should at least have header + scale. + checkIndex(pos + 1, value.length); int basicType = value[pos] & BASIC_TYPE_MASK; int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK; if (basicType != PRIMITIVE) throw unexpectedType(Type.DECIMAL); @@ -589,6 +590,92 @@ public static T handleArray(byte[] value, int pos, ArrayHandler handler) return handler.apply(size, offsetSize, offsetStart, dataStart); } + // Validate whether a variant is well-formed. Returns true if the variant binary is structurally + // well-formed (all bounds and type-info checks pass), false if it is malformed. + // + // This is close to, but not strictly equivalent to, "`toJson` does not throw": this function + // does not enforce the `SIZE_LIMIT` check that the `Variant` constructor applies (which throws + // `VARIANT_CONSTRUCTOR_SIZE_LIMIT`). The implementation otherwise has the same structure as + // `toJson` (see `Variant.toJsonImpl`). + // + // Implementation note: this `try { ... } catch (SparkRuntimeException e)` is sound only because + // every helper invoked by `validateImpl` throws `MALFORMED_VARIANT` / + // `UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT` rather than a raw `ArrayIndexOutOfBoundsException` on + // malformed input. Preserve that invariant when adding new cases. + public static boolean isValidVariant(byte[] value, byte[] metadata) { + if (value == null || metadata == null) return false; + // Validate the metadata version, similar to the check in the `Variant` constructor. + if (metadata.length < 1 || (metadata[0] & VERSION_MASK) != VERSION) return false; + try { + validateImpl(value, metadata, 0); + return true; + } catch (SparkRuntimeException e) { + return false; + } + } + + private static void validateImpl(byte[] value, byte[] metadata, int pos) { + switch (getType(value, pos)) { + case OBJECT: + handleObject(value, pos, (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> { + for (int i = 0; i < size; ++i) { + int id = readUnsigned(value, idStart + idSize * i, idSize); + int offset = readUnsigned(value, offsetStart + offsetSize * i, offsetSize); + int elementPos = dataStart + offset; + getMetadataKey(metadata, id); + validateImpl(value, metadata, elementPos); + } + return null; + }); + break; + case ARRAY: + handleArray(value, pos, (size, offsetSize, offsetStart, dataStart) -> { + for (int i = 0; i < size; ++i) { + int offset = readUnsigned(value, offsetStart + offsetSize * i, offsetSize); + int elementPos = dataStart + offset; + validateImpl(value, metadata, elementPos); + } + return null; + }); + break; + case NULL: + break; + case BOOLEAN: + getBoolean(value, pos); + break; + case LONG: + getLong(value, pos); + break; + case STRING: + getString(value, pos); + break; + case DOUBLE: + getDouble(value, pos); + break; + case DECIMAL: + getDecimal(value, pos); + break; + case DATE: + case TIMESTAMP: + case TIMESTAMP_NTZ: + getLong(value, pos); + break; + case FLOAT: + getFloat(value, pos); + break; + case BINARY: + getBinary(value, pos); + break; + case UUID: + getUuid(value, pos); + break; + default: + // This is practically unreachable because we handle all possible types. It only + // intends to ensure we don't forget adding a new case when adding a new type. + throw malformedVariant(); + } + } + // Get a key at `id` in the variant metadata. // Throw `MALFORMED_VARIANT` if the variant is malformed. An out-of-bound `id` is also considered // a malformed variant because it is read from the corresponding variant value. diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index e4148ce906b83..b4f5cd72f551d 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 92487f11cc165..66022cab7c77b 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index faeb39108c4f9..d57d3aa5ea03e 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -244,6 +244,8 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT override def supportsTableSample: Boolean = true + override def supportsTableSampleSystem: Boolean = true + override def supportsIndex: Boolean = true override def indexOptions: String = "FILLFACTOR=70" diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index df5dfdf7deafb..79366189c20db 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -148,7 +148,7 @@ private[v2] trait V2JDBCTest partitionColumn: String) val tableNameToPartinioningOptions: Map[String, PartitioningInfo] = Map( "employee" -> PartitioningInfo("4", "1", "8", "dept"), - // new_table is used in "SPARK-37038: Test TABLESAMPLE" test + // new_table is used in "SPARK-37038,SPARK-57040: Test TABLESAMPLE" test "new_table" -> PartitioningInfo("4", "1", "20", "col1") ) @@ -470,6 +470,8 @@ private[v2] trait V2JDBCTest def supportsTableSample: Boolean = false + def supportsTableSampleSystem: Boolean = false + test("SPARK-48172: Test CONTAINS") { val df1 = spark.sql( s""" @@ -699,9 +701,20 @@ private[v2] trait V2JDBCTest assert(rows12(5).getString(0) === "special_character_underscorenot_present") } + test("SPARK-57040: TABLESAMPLE with replacement is not pushed down") { + withTable(s"$catalogName.new_table") { + sql(s"CREATE TABLE $catalogName.new_table (col1 INT, col2 INT)") + spark.range(10).select($"id" * 2, $"id" * 2 + 1).write.insertInto(s"$catalogName.new_table") + val df = spark.read.table(s"$catalogName.new_table") + .sample(withReplacement = true, fraction = 0.5, seed = 12345) + checkSamplePushed(df, false) + assert(df.collect().length > 0) + } + } + val partitioningEnabledTestCase = Seq(true, false) gridTest( - "SPARK-37038: Test TABLESAMPLE" + "SPARK-37038,SPARK-57040: Test TABLESAMPLE" )(partitioningEnabledTestCase) { partitioningEnabled => if (supportsTableSample) { withTable(s"$catalogName.new_table") { @@ -789,6 +802,27 @@ private[v2] trait V2JDBCTest checkSamplePushed(df8, false) checkFilterPushed(df8) assert(df8.collect().length < 10) + + // SYSTEM sampling pushdown + if (supportsTableSampleSystem) { + val df9 = sql(s"SELECT * FROM $catalogName.new_table $tableOptions " + + "TABLESAMPLE SYSTEM (50 PERCENT)") + checkSamplePushed(df9) + if (partitioningEnabled) { + multiplePartitionAdditionalCheck(df1, partitionInfo) + } + assert(df9.collect().length <= 10) + + // SYSTEM sampling + column pruning + val df10 = sql(s"SELECT col1 FROM $catalogName.new_table $tableOptions " + + "TABLESAMPLE SYSTEM (50 PERCENT)") + checkSamplePushed(df10) + checkColumnPruned(df10, "col1") + if (partitioningEnabled) { + multiplePartitionAdditionalCheck(df1, partitionInfo) + } + assert(df10.collect().length <= 10) + } } } } diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index fbea96ec6ce7e..b86c94f3e35af 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index b21810d055fc4..4980e94c45776 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index 7c3a7f4bd0fde..5c471db25becb 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index 7da9bc31cb640..a7b5b06a6ff58 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 97df4301edffd..c73a0015c416e 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index c4708b5489c51..c24bd4886e770 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml index 99e06e9bf8029..93572d6d671d3 100644 --- a/connector/profiler/pom.xml +++ b/connector/profiler/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 4f6e826c7b8ba..e9521f9418c1f 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index 57138b71b06e6..7b18a97cbd9de 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index f0ae6a4184ca2..6b228a86f3535 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java index a40dab8a8dab7..2a3678a6b94da 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java @@ -594,28 +594,8 @@ public UnsafeSorterIterator getSortedIterator() throws IOException { logger.info("Merging {} spill files using bounded merge with factor {}", MDC.of(LogKeys.NUM_SPILL_WRITERS, spillWriters.size()), MDC.of(LogKeys.MERGE_FACTOR, spillMergeFactor)); - - // This assignment is not inside synchronized(this), unlike the read in - // cleanupResources(). That is safe because all callers of cleanupResources() - // (the task completion listener, iterator-end cleanup from wrappers like - // UnsafeExternalRowSorter / UnsafeKVExternalSorter / SortExec, etc.) run on - // the task thread, sequentially with getSortedIterator(). The volatile modifier - // on boundedMerger provides memory visibility across any intervening - // synchronized blocks. - boundedMerger = new UnsafeSorterBoundedSpillMerger( - spillMergeFactor, - recordComparatorSupplier.get(), - prefixComparator, - blockManager, - serializerManager, - fileBufferSizeBytes); - - UnsafeSorterIterator inMemIter = null; - if (inMemSorter != null) { - readingIterator = new SpillableIterator(inMemSorter.getSortedIterator()); - inMemIter = readingIterator; - } - return boundedMerger.merge(spillWriters, inMemIter); + BoundedMergerContext ctx = prepareBoundedMerge(); + return ctx.merger.merge(ctx.snapshot, ctx.inMemIter); } else { // Original single-round merge: open all spill readers at once logger.info("Merging {} spill files in single round", @@ -633,6 +613,60 @@ public UnsafeSorterIterator getSortedIterator() throws IOException { } } + @VisibleForTesting + static final class BoundedMergerContext { + final List snapshot; + @Nullable final SpillableIterator inMemIter; + final UnsafeSorterBoundedSpillMerger merger; + + BoundedMergerContext( + List snapshot, + @Nullable SpillableIterator inMemIter, + UnsafeSorterBoundedSpillMerger merger) { + this.snapshot = snapshot; + this.inMemIter = inMemIter; + this.merger = merger; + } + } + + @VisibleForTesting + BoundedMergerContext prepareBoundedMerge() { + // Snapshot MUST precede readingIterator publication. Once readingIterator is + // non-null, a sibling MemoryConsumer's spill request is routed via + // readingIterator.spill(), which appends a new writer to spillWriters AND rebinds + // readingIterator.upstream to that same file. A post-publication snapshot would + // then feed that file to BOTH the snapshot path and readingIterator -- duplicate + // records in the merged output. List.copyOf returns an unmodifiable list so any + // future code that mutates the snapshot (or aliases the live spillWriters field + // into the context and adds to it) fails fast. + final List snapshot = List.copyOf(spillWriters); + + // The volatile fields published below -- boundedMerger and readingIterator -- are + // written without holding synchronized(this). Safe because all callers of + // getSortedIterator() and cleanupResources() (the task completion listener, + // iterator-end cleanup from wrappers like UnsafeExternalRowSorter / + // UnsafeKVExternalSorter / SortExec, etc.) run on the task thread, sequentially. + // The volatile modifier provides memory visibility to off-task-thread readers: + // sibling MemoryConsumer.spill() reads readingIterator, and cleanupResources()'s + // synchronized(this) read of boundedMerger crosses any intervening synchronized + // blocks. + final UnsafeSorterBoundedSpillMerger merger = new UnsafeSorterBoundedSpillMerger( + spillMergeFactor, + recordComparatorSupplier.get(), + prefixComparator, + blockManager, + serializerManager, + fileBufferSizeBytes); + boundedMerger = merger; + + SpillableIterator inMemIter = null; + if (inMemSorter != null) { + readingIterator = new SpillableIterator(inMemSorter.getSortedIterator()); + inMemIter = readingIterator; + } + return new BoundedMergerContext(snapshot, inMemIter, merger); + } + @VisibleForTesting boolean hasSpaceForAnotherRecord() { return inMemSorter.hasSpaceForAnotherRecord(); } diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java index 1f389465a8b21..b844f9816bf3c 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterBoundedSpillMerger.java @@ -90,6 +90,11 @@ final class UnsafeSorterBoundedSpillMerger { *

If {@code inMemIterator} is non-null, it is included in the final merge round * (not spilled to disk in intermediate rounds).

* + *

This method does not mutate the input {@code spillWriters} list; intermediate + * rounds reassign a local variable to fresh lists. Callers are still responsible for + * passing a defensive snapshot if they need to protect against concurrent mutation + * of the underlying list (see {@link UnsafeExternalSorter#prepareBoundedMerge}).

+ * * @param spillWriters the list of spill writers to merge * @param inMemIterator optional in-memory sorted iterator to include in the final merge * @return a sorted iterator over all records @@ -98,7 +103,7 @@ public UnsafeSorterIterator merge( List spillWriters, @Nullable UnsafeSorterIterator inMemIterator) throws IOException { - List spillsToMerge = new ArrayList<>(spillWriters); + List spillsToMerge = spillWriters; int round = 0; while (spillsToMerge.size() > mergeFactor) { diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css index 202579c6b67ce..e7a8f3ab0839a 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css @@ -58,4 +58,36 @@ table.dataTable thead .sorting_desc_disabled::after { div.dataTables_wrapper div.dataTables_length select { width: 100%; +} + +/* SQL tab sub-execution disclosure (SPARK-56811) */ +table#sql-table td.sub-exec-toggle { + white-space: nowrap; +} + +table#sql-table td.sub-exec-toggle a.toggle-sub-exec { + text-decoration: none; +} + +table#sql-table td.sub-exec-toggle a.toggle-sub-exec:hover { + text-decoration: underline; +} + +table#sql-table tr.shown td.sub-exec-toggle a.toggle-sub-exec { + font-weight: 600; +} + +table#sql-table tr.shown + tr > td { + background-color: var(--bs-tertiary-bg, #f4f7fa); +} + +table.sub-exec-table { + margin-left: 1.5rem !important; + width: calc(100% - 1.5rem) !important; + background-color: transparent; +} + +table.sub-exec-table thead th { + font-weight: 600; + background-color: transparent; } \ No newline at end of file diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index fad9bb522ad92..0262144490ce8 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -3152,6 +3152,8 @@ object SparkContext extends Logging { private[spark] val RDD_SCOPE_KEY = "spark.rdd.scope" private[spark] val RDD_SCOPE_NO_OVERRIDE_KEY = "spark.rdd.scope.noOverride" private[spark] val SQL_EXECUTION_ID_KEY = "spark.sql.execution.id" + private[spark] val DATASET_QUERY_EXECUTION_ID_KEY = + "spark.sql.dataset.queryExecution.id" /** * Executor id for the driver. In earlier versions of Spark, this was ``, but this was diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala index 8cd95ee653ebe..7c704c3d2b37e 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RBackendAuthHandler.scala @@ -19,6 +19,7 @@ package org.apache.spark.api.r import java.io.{ByteArrayOutputStream, DataOutputStream} import java.nio.charset.StandardCharsets.UTF_8 +import java.security.MessageDigest import io.netty.channel.{Channel, ChannelHandlerContext, SimpleChannelInboundHandler} @@ -34,7 +35,8 @@ private class RBackendAuthHandler(secret: String) // The R code adds a null terminator to serialized strings, so ignore it here. val clientSecret = new String(msg, 0, msg.length - 1, UTF_8) try { - require(secret == clientSecret, "Auth secret mismatch.") + require(MessageDigest.isEqual(secret.getBytes(UTF_8), clientSecret.getBytes(UTF_8)), + "Auth secret mismatch.") ctx.pipeline().remove(this) writeReply("ok", ctx.channel()) } catch { diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala index 04302c77a3984..2a3fd0d004e11 100644 --- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala @@ -20,10 +20,12 @@ package org.apache.spark.deploy import org.json4s.JsonAST._ import org.json4s.JsonDSL._ +import org.apache.spark.SparkConf import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse} import org.apache.spark.deploy.master._ import org.apache.spark.deploy.worker.ExecutorRunner import org.apache.spark.resource.{ResourceInformation, ResourceRequirement} +import org.apache.spark.util.Utils private[deploy] object JsonProtocol { @@ -123,10 +125,16 @@ private[deploy] object JsonProtocol { * `memoryperexecutor` minimal memory in MB required to each executor * `resourcesperexecutor` minimal resources required to each executor * `user` name of the user who submitted the application - * `command` the command string used to submit the application + * `command` the command string used to submit the application, with secret-bearing + * fields (`environment`, `javaOpts`) redacted using `spark.redaction.regex` * For compatibility also returns the deprecated `memoryperslave` & `resourcesperslave` fields. */ - def writeApplicationDescription(obj: ApplicationDescription): JObject = { + def writeApplicationDescription(obj: ApplicationDescription, conf: SparkConf): JObject = { + val redactedEnvironment = Utils.redact(conf, obj.command.environment.toSeq).toMap + val redactedJavaOpts = Utils.redactCommandLineArgs(conf, obj.command.javaOpts) + val redactedCommand = obj.command.copy( + environment = redactedEnvironment, + javaOpts = redactedJavaOpts) ("name" -> obj.name) ~ ("cores" -> obj.maxCores.getOrElse(0)) ~ ("memoryperexecutor" -> obj.memoryPerExecutorMB) ~ @@ -134,7 +142,7 @@ private[deploy] object JsonProtocol { ("memoryperslave" -> obj.memoryPerExecutorMB) ~ ("resourcesperslave" -> obj.resourceReqsPerExecutor.toList.map(writeResourceRequirement)) ~ ("user" -> obj.user) ~ - ("command" -> obj.command.toString) + ("command" -> redactedCommand.toString) } /** @@ -154,7 +162,7 @@ private[deploy] object JsonProtocol { ("memory" -> obj.memory) ~ ("resources" -> writeResourcesInfo(obj.resources)) ~ ("appid" -> obj.appId) ~ - ("appdesc" -> writeApplicationDescription(obj.appDesc)) + ("appdesc" -> writeApplicationDescription(obj.appDesc, obj.conf)) } /** diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index 8f0b684a93e81..5bfb486b0e606 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -54,7 +54,7 @@ private[deploy] class ExecutorRunner( val sparkHome: File, val executorDir: File, val workerUrl: String, - conf: SparkConf, + val conf: SparkConf, val appLocalDirs: Seq[String], @volatile var state: ExecutorState.Value, val rpId: Int, diff --git a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala index 98b80317db982..8ecb14be1dfb8 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala @@ -39,6 +39,13 @@ private[spark] object Tests { .booleanConf .createOptional + val INJECT_SHUFFLE_FETCH_FAILURES = + ConfigBuilder("spark.testing.injectShuffleFetchFailures") + .doc("Injecting fetch failures for shuffle stages by providing an invalid BlockManager " + + "location for the first stage attempt. Testing only flag!") + .booleanConf + .createWithDefault(false) + val TEST_NO_STAGE_RETRY = ConfigBuilder("spark.test.noStageRetry") .version("1.2.0") .booleanConf diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala index d7bda5bbe721a..3c045fcd95411 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala @@ -99,7 +99,7 @@ class HadoopMapReduceCommitProtocol( * e.g. a=1/b=2. Files under these partitions will be saved into staging directory and moved to * destination directory at the end, if `dynamicPartitionOverwrite` is true. */ - @transient private var partitionPaths: mutable.Set[String] = null + @transient protected var partitionPaths: mutable.Set[String] = null /** * The staging directory of this write job. Spark uses it to deal with files with absolute output diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala index 49c259999a471..675c44153cd4d 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala @@ -130,6 +130,22 @@ private[spark] object RDDOperationScope extends Logging { name: String, allowNesting: Boolean, ignoreParent: Boolean)(body: => T): T = { + withScope(sc, name, allowNesting, ignoreParent, + nextScopeId().toString)(body) + } + + /** + * Execute the given body such that all RDDs created in this body + * will have the same scope, with an explicit scope ID. + * + * Note: Return statements are NOT allowed in body. + */ + private[spark] def withScope[T]( + sc: SparkContext, + name: String, + allowNesting: Boolean, + ignoreParent: Boolean, + rddScopeId: String)(body: => T): T = { // Save the old scope to restore it later val scopeKey = SparkContext.RDD_SCOPE_KEY val noOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY @@ -139,10 +155,12 @@ private[spark] object RDDOperationScope extends Logging { try { if (ignoreParent) { // Ignore all parent settings and scopes and start afresh with our own root scope - sc.setLocalProperty(scopeKey, new RDDOperationScope(name).toJson) + sc.setLocalProperty(scopeKey, + new RDDOperationScope(name, None, rddScopeId).toJson) } else if (sc.getLocalProperty(noOverrideKey) == null) { // Otherwise, set the scope only if the higher level caller allows us to do so - sc.setLocalProperty(scopeKey, new RDDOperationScope(name, oldScope).toJson) + sc.setLocalProperty(scopeKey, + new RDDOperationScope(name, oldScope, rddScopeId).toJson) } // Optionally disallow the child body to override our scope if (!allowNesting) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 5fbd160bc683b..22720b98aafde 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -325,6 +325,16 @@ private[spark] class DAGScheduler( private val messageScheduler = ThreadUtils.newDaemonSingleThreadScheduledExecutor("dag-scheduler-message") + private def scheduleResubmit(): Unit = { + messageScheduler.schedule( + new Runnable { + override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages) + }, + DAGScheduler.RESUBMIT_TIMEOUT, + TimeUnit.MILLISECONDS + ) + } + private[spark] var eventProcessLoop = new DAGSchedulerEventProcessLoop(this) // Used for test only. Some tests uses the same thread of the event poster to // process the events to ensure the deterministic behavior during the test. @@ -1858,6 +1868,11 @@ private[spark] class DAGScheduler( throw SparkCoreErrors.accessNonExistentAccumulatorError(id) } acc.merge(updates.asInstanceOf[AccumulatorV2[Any, Any]]) + if (acc.isInstanceOf[LastAttemptAccumulator[_, _, _]]) { + acc.asInstanceOf[LastAttemptAccumulator[_, _, _]].mergeLastAttempt( + updates, stage.rdd, event.taskInfo, + task.stageId, task.stageAttemptId, task.localProperties) + } // To avoid UI cruft, ignore cases where value wasn't updated if (acc.name.isDefined && !updates.isZero) { stage.latestInfo.accumulables(id) = acc.toInfo(None, Some(acc.value)) @@ -2174,13 +2189,7 @@ private[spark] class DAGScheduler( if (noResubmitEnqueued) { logInfo(log"Resubmitting ${MDC(FAILED_STAGE, stage)} " + log"(${MDC(FAILED_STAGE_NAME, stage.name)}) due to rollback.") - messageScheduler.schedule( - new Runnable { - override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages) - }, - DAGScheduler.RESUBMIT_TIMEOUT, - TimeUnit.MILLISECONDS - ) + scheduleResubmit() } } @@ -2333,6 +2342,19 @@ private[spark] class DAGScheduler( // The epoch of the task is acceptable (i.e., the task was launched after the most // recent failure we're aware of for the executor), so mark the task's output as // available. + // For testing purposes, inject fetch failures controlled from the driver-side by + // supplying an invalid location. + if (Utils.isTesting && + sc.conf.get(config.Tests.INJECT_SHUFFLE_FETCH_FAILURES) && + task.stageAttemptId == 0) { + val currentLocation = status.location + val invalidLocation = BlockManagerId( + execId = BlockManagerId.INVALID_EXECUTOR_ID, + host = currentLocation.host, + port = currentLocation.port, + topologyInfo = currentLocation.topologyInfo) + status.updateLocation(invalidLocation) + } val isChecksumMismatched = mapOutputTracker.registerMapOutput( shuffleStage.shuffleDep.shuffleId, smt.partitionId, status) if (isChecksumMismatched) { @@ -2492,13 +2514,7 @@ private[spark] class DAGScheduler( log"Resubmitting ${MDC(STAGE, mapStage)} " + log"(${MDC(STAGE_NAME, mapStage.name)}) and ${MDC(FAILED_STAGE, failedStage)} " + log"(${MDC(FAILED_STAGE_NAME, failedStage.name)}) due to fetch failure") - messageScheduler.schedule( - new Runnable { - override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages) - }, - DAGScheduler.RESUBMIT_TIMEOUT, - TimeUnit.MILLISECONDS - ) + scheduleResubmit() } } @@ -2605,9 +2621,7 @@ private[spark] class DAGScheduler( if (noResubmitEnqueued) { logInfo(log"Resubmitting ${MDC(FAILED_STAGE, failedStage)} " + log"(${MDC(FAILED_STAGE_NAME, failedStage.name)}) due to barrier stage failure.") - messageScheduler.schedule(new Runnable { - override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages) - }, DAGScheduler.RESUBMIT_TIMEOUT, TimeUnit.MILLISECONDS) + scheduleResubmit() } } } diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala index ecebb97ecfc1d..d2a81e56265c9 100644 --- a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala +++ b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala @@ -21,6 +21,7 @@ import java.io.{DataInputStream, DataOutputStream} import java.net.Socket import java.nio.channels.SocketChannel import java.nio.charset.StandardCharsets.UTF_8 +import java.security.MessageDigest import org.apache.spark.SparkConf import org.apache.spark.internal.config.Python.{PYTHON_UNIX_DOMAIN_SOCKET_DIR, PYTHON_UNIX_DOMAIN_SOCKET_ENABLED} @@ -65,7 +66,7 @@ private[spark] class SocketAuthHelper(val conf: SparkConf) { try { s.setSoTimeout(10000) val clientSecret = readUtf8(s) - if (secret == clientSecret) { + if (MessageDigest.isEqual(secret.getBytes(UTF_8), clientSecret.getBytes(UTF_8))) { writeUtf8("ok", s) shouldClose = false } else { diff --git a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala index 847d3ffff630a..78e137f53355c 100644 --- a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala @@ -218,7 +218,7 @@ private[spark] trait PagedTable[T] { } } -
+
Page:
    diff --git a/core/src/main/scala/org/apache/spark/util/LastAttemptAccumulator.scala b/core/src/main/scala/org/apache/spark/util/LastAttemptAccumulator.scala new file mode 100644 index 0000000000000..a262750a8cd33 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/LastAttemptAccumulator.scala @@ -0,0 +1,984 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.util + +import scala.math.Ordering.Implicits._ +import scala.reflect.ClassTag +import scala.util.control.NonFatal + +import org.apache.spark.SparkContext +import org.apache.spark.internal.{LogEntry, Logging, LogKey, LogKeys} +import org.apache.spark.rdd.RDD +import org.apache.spark.scheduler.TaskInfo + +/* + * Last Attempt Accumulators are Accumulators that track the value of a metric aggregated across + * the "last execution" that produced the values. "Last execution" can be defined as: + * - For RDDs: the last execution of a given RDD partition, in the latest Stage and Stage attempt + * that recomputed it. + * - Across RDDs: lastAttemptValueForRDDId, lastAttemptValueForRDDIds, lastAttemptValueForAllRDDs, + * lastAttemptValueForHighestRDDId let specify that only values from specific RDDs should be + * aggregated. + * - For Spark SQL Execution: In SQLLastAttemptAccumulator, lastAttemptValueForDataset, + * lastAttemptValueForQueryExecution let specify that only values from the last SQL execution of + * a specific Dataset (or QueryExecution) should be aggregated. + * + * In specific situations the last attempt value cannot be computed. This is both because of known + * specific user actions (e.g. mixing driver updates with task updates), and because the + * accumulator performs (and logs) various internal sanity checks and bails out if it detects an + * unexpected situation. Therefore, all the lastAttempt methods return an Option[OUT], where None + * means that it has bailed out. + * + * Updates to the accumulator from completed Tasks are merged in mergeLastAttempt, called from + * DAGScheduler.updateAccumulators, called from DAGScheduler.handleTaskCompletion in the single + * threaded DAGScheduler event loop. Therefore, we don't need to worry about concurrency control + * when updating the accumulator values. However, reading of the last attempt value can potentially + * be done concurrently, so we use synchronization. When there is normally no contention, JVM + * synchronization should be very low overhead. + * + * In order to be able to provide last attempt value, we need to keep track of partial metric + * values, so that after a partial re-attempt the partial value can be updated, and then + * re-aggregated. + * There are various sources of re-attempts that we have to track: + * + * 1. Spark Core. + * ============== + * - Updates from failed tasks are filtered in Task.collectAccumulatorUpdates before they are + * even passed back to the driver. We don't need to worry about them here. + * - We should not get results from two successful attempts of a Task in the same Stage attempt. + * TaskSetManager.handleSuccessfulTask ensures that. + * - Therefore we only need to track Stage retries. The Last Attempt Metric will aggregate the + * metric value of a given RDD partition from the last attempt of the Stage with the highest + * stageId. + * Normally recomputation creates a new stageAttemptId in the same Stage, but there can also + * be multiple new Stages due to: + * - In AQE, a materialized QueryStage is submitted as a new Stage, which would normally get + * skipped, as it is already materialized. However, if results of that stage have been lost, + * the recomputation will happen in that Stage. + * - If the same Dataset with the same QueryExecution and same executedPlan is reused for + * another execution (e.g. again calling collect()). All map stages should be materialized, + * so like with AQE, they should be skipped, unless the results have been lost. Then, + * recomputation will happen in that Stage. The result stage computing the action will be + * fully re-executed. + * - Due to the async nature of cancellation, there can be tasks from previous attempts that + * arrive later than the last attempt. Therefore, we need to track and compare stageId and + * stageAttemptId of every computed RDD partition, in order to discard latecomers. + * + * 2. Spark SQL. + * ============= + * LastAttemptAccumulator offers simple tracking of the last SQL execution, by assuming that + * the last execution will be in the scope of an RDD with the highest id, and using + * [[lastAttemptValueForHighestRDDId]]. See SQLLastAttemptAccumulator for more possibilities + * of tracking SQL execution. + * + * Simple last SQL execution tracking + * ---------------------------------- + * Whenever an AQE replan happens, or a repeated execution is submitted, there will be a new + * RDD created for that execution. If AQE creates a new plan, it always uses it and cancels + * the previous one. So, aggregating the metric updates from the RDD with the highest id + * should correspond to the last execution and the latest AQE plan. + * This has some limitations, e.g. doesn't work if the same metric is used in multiple places + * in the query plan, and we want all occurrences to be aggregated together. + * It also wouldn't work if a SparkPlan splits its execution into multiple RDDs. This for example + * happens in BroadcastNestedLoopJoinExec with matchedStreamRows and notMatchedBroadcastRows. + * One can use this simple last attempt tracking by using lastAttemptLastRDDValue. + * + * 3. Driver only updates. + * ======================= + * Sometimes the metric is manipulated directly from the driver, not from within a Task. + * It can be either explicit by user code, or implicit by Catalyst Optimizer, for example + * ConvertToLocalRelation rule, folding a piece of the plan by evaluating it manually on the + * driver. + * When this happens, LastAttemptAccumulator has no information to reason about what was the + * last execution. If the only metric updates are coming from the driver, it assumes that these are + * the "last attempt". If there are both updates from executors and from the driver, it bails out. + * + * Implementation + * ============== + * To track the last attempts, we track a map of metric values per RDD id: + * - Map[RddId, LastAttemptRDDVals[PARTIAL]] + * + * In LastAttemptRDDVals we track an Array of per RDD partition partial merge values, together with + * the stageId and stageAttemptId and taskAttemptNumber to record task execution. + * We also track the RDD id, RDDScope id and last SQL execution id updating that RDD. + * + * Normally to merge partial values, two full Accumulators are used. However, accumulator classes + * that support Last Attempt have to implement partialMerge which merges PARTIAL type. + * This is used to have more compact representation, as PARTIAL can be e.g. a primitive type as + * opposed to a full AccumulatorV2 object instance. + */ + + +private class LastAttemptRDDVals[@specialized T]( + val rddId: Int, + val rddScopeId: Option[String], + // Array of partial metric values, indexed by RDD partition id. + // Metric updates to a given RDD partition can come from different stageAttempts if a retry + // happens while a Job with the Stage is running (a downstream Stage within a Job detects + // missing blocks and triggers recompute), or from different Stages, if a retry happens later + // (a new Job is submitted that depends on data from the RDD, if it finds it's missing it will + // recompute it in a new Stage). + // If a missing output is detected in a Stage while the stage is still running (e.g. executor + // is lost or decommissioned while the stage is running, and loses the output of some already + // finished tasks), a new Task with new taskAttemptNumber will be started for that Task. + // There may be multiple Tasks with different taskAttemptNumbers running in parallel due to + // speculation, but DAGScheduler guarantees that only one of them will reach metrics reporting, + // so it doesn't have to be dealt with here. + // + // There may be partitions that are either not computed at all (for example, due to early stop + // in take/limit), or AQE task coalescing may be visible as an update of the partition id of + // the first partition of the coalesced range. AQE guarantees that if these are retried, they + // will be coalesced in the same ranges, so update the same values. + // Whether a partition has been computed is tracked by [[computedBitmap]] below; the value at + // its slot in [[partitionPartialVals]] is undefined (typically the zero of T) for uncomputed + // partitions. + // + // Arrays of primitive types are more memory efficient than an array of objects due to + // references, object headers and paddings overheads. + // The `@specialized` annotation should make scala specialize it to use primitive array instead + // of boxed objects. + val partitionPartialVals: Array[T]) + { + + import LastAttemptRDDVals.EMPTY_ID + + // In a case of repeated execution of the same QueryExecution and reuse of the SparkPlan + // (for example multiple `collect()` on the same Dataset), a new RDD may be executed in the same + // RDDOperationScope for the new execution. Hence, we can have multiple RDDs with the same + // RDDOperationScope, coming from different SQL executions and we should only count the last one. + // However, it may also be an old RDD that is reused in the new execution, but needs to be + // partially recomputed because part of it is missing. In that case, the last attempt value needs + // to still be aggregated over the whole RDD, because the whole RDD is used in the new execution. + // Note that this only applies per RDDOperationScope/SparkPlan, because other plans in the same + // new execution may have reused their RDD in whole, and hence have the last SQL executionId + // come from an earlier execution. + // Note: This doesn't work in case a user concurrently executed multiple actions on the same + // Dataset, resulting in multiple concurrent executions trying to compute the same RDD. This + // however should not happen in practice and would likely produce other unexpected effects. + var lastSqlExecutionId: Option[Long] = None + + // Common (stageId, stageAttemptId, taskAttemptNumber) shared by the majority of computed + // partitions. In the common case (no stage retries), every computed partition has the same + // attempt tuple, so we store it once at the RDD level instead of allocating three N-sized int + // arrays. The values are set on the first update and never changed; partitions whose attempt + // differs are recorded in the override arrays below. + // EMPTY_ID until the first update. + private var commonStageId: Int = EMPTY_ID + private var commonStageAttemptId: Int = EMPTY_ID + private var commonTaskAttemptNumber: Int = EMPTY_ID + + // Bitmap of partitions that have been computed, one bit per partition packed into longs. + // A bit is set when a partition receives its first update; a partition with a clear bit has not + // been computed (e.g. early stop in take/limit, AQE task coalescing). + // Reads of an individual long are atomic on 64-bit JVMs, matching the loose concurrency + // semantics of the original per-partition int arrays. + private val computedBitmap: Array[Long] = + new Array[Long]((partitionPartialVals.length + 63) >>> 6) + + // Per-partition override arrays for each component of the attempt tuple. Each is allocated + // lazily and independently the first time some partition's value for that component diverges + // from the common value; until then the field is null and no per-partition state is kept for + // that component. Once allocated, an array is sized [[numPartitions]]: entries equal to + // EMPTY_ID mean "match the common value" and any other value is the per-partition override. + // This way: + // - RDDs without retries pay zero per-partition allocations (all three fields stay null). + // - A pure stage retry (new stageAttemptId, same stageId, taskAttemptNumber resets to 0) + // allocates only [[overrideStageAttemptIds]]. + // - A mid-stage retry (executor lost, some tasks restart with a higher taskAttemptNumber) + // allocates only [[overrideTaskAttemptNumbers]]. + // - Whole-stage cross-Stage retry (new stageId) allocates [[overrideStageIds]] too. + // + // Concurrency: update() is called only from the DAGScheduler scheduler loop. Some readers of + // the state can run concurrently (e.g. logAccumulatorState formatting). The fields are + // declared @volatile, and the new array is fully populated before the field is assigned, so a + // reader either sees null (use common) or sees an array whose Array.fill initialization is + // visible. In-place element writes for subsequent overrides are plain ints; readers may see + // them eventually, matching the loose semantics of the original per-partition int arrays. + @volatile private var overrideStageIds: Array[Int] = null + @volatile private var overrideStageAttemptIds: Array[Int] = null + @volatile private var overrideTaskAttemptNumbers: Array[Int] = null + + def numPartitions: Int = partitionPartialVals.length + + def isEmptyAt(partitionId: Int): Boolean = { + val word = computedBitmap(partitionId >>> 6) + ((word >>> (partitionId & 63)) & 1L) == 0L + } + + private def setComputedBit(partitionId: Int): Unit = { + val idx = partitionId >>> 6 + computedBitmap(idx) = computedBitmap(idx) | (1L << (partitionId & 63)) + } + + /** + * Records a new value for one component (stageId / stageAttemptId / taskAttemptNumber) of the + * attempt tuple at `partitionId`, allocating the override array on first divergence. Returns + * the array reference the caller should write back to the @volatile field - either a freshly + * allocated and populated array (first override for this component) or the existing array + * after an in-place update. Once the array exists, the value is always written, even when it + * matches the common value - lookupComponent returns it correctly either way. + */ + private def setOverrideComponent( + array: Array[Int], + partitionId: Int, + value: Int, + common: Int): Array[Int] = { + if (array == null) { + if (value == common) null + else { + val newArr = Array.fill(partitionPartialVals.length)(EMPTY_ID) + newArr(partitionId) = value + newArr + } + } else { + array(partitionId) = value + array + } + } + + /** Reads one component's value at `partitionId`, falling back to `common` when the override + * array is null or the entry is still EMPTY_ID (the slot was either not yet written, or was + * initialized to EMPTY_ID and never overwritten because the partition's value matched the + * common when the array was first allocated for a different partition). */ + private def lookupComponent(array: Array[Int], partitionId: Int, common: Int): Int = { + if (array == null) common + else { + val v = array(partitionId) + if (v == EMPTY_ID) common else v + } + } + + def update(partialValue: AccumulatorPartialVal[T]): Unit = { + val partId = partialValue.rddPartitionId + if (commonStageId == EMPTY_ID) { + commonStageId = partialValue.stageId + commonStageAttemptId = partialValue.stageAttemptId + commonTaskAttemptNumber = partialValue.taskAttemptNumber + } + partitionPartialVals(partId) = partialValue.partialMergeVal + setComputedBit(partId) + overrideStageIds = setOverrideComponent( + overrideStageIds, partId, partialValue.stageId, commonStageId) + overrideStageAttemptIds = setOverrideComponent( + overrideStageAttemptIds, partId, partialValue.stageAttemptId, commonStageAttemptId) + overrideTaskAttemptNumbers = setOverrideComponent( + overrideTaskAttemptNumbers, partId, partialValue.taskAttemptNumber, commonTaskAttemptNumber) + lastSqlExecutionId = partialValue.sqlExecutionId + } + + def partialValueAt(partId: Int): AccumulatorPartialVal[T] = { + var sId = EMPTY_ID + var saId = EMPTY_ID + var tan = EMPTY_ID + if (!isEmptyAt(partId)) { + sId = lookupComponent(overrideStageIds, partId, commonStageId) + saId = lookupComponent(overrideStageAttemptIds, partId, commonStageAttemptId) + tan = lookupComponent(overrideTaskAttemptNumbers, partId, commonTaskAttemptNumber) + } + AccumulatorPartialVal( + partialMergeVal = partitionPartialVals(partId), + rddId = rddId, + rddPartitionId = partId, + rddNumPartitions = partitionPartialVals.length, + rddScopeId = rddScopeId, + stageId = sId, + stageAttemptId = saId, + taskAttemptNumber = tan, + sqlExecutionId = lastSqlExecutionId) + } + + override def toString: String = { + val n = numPartitions + val partVals = new StringBuilder("[") + val sIds = new StringBuilder("[") + val saIds = new StringBuilder("[") + val tans = new StringBuilder("[") + var i = 0 + while (i < n) { + if (i > 0) { + partVals.append(',') + sIds.append(',') + saIds.append(',') + tans.append(',') + } + partVals.append(partitionPartialVals(i)) + val pv = partialValueAt(i) + sIds.append(pv.stageId) + saIds.append(pv.stageAttemptId) + tans.append(pv.taskAttemptNumber) + i += 1 + } + partVals.append(']') + sIds.append(']') + saIds.append(']') + tans.append(']') + s"""LastAttemptVal( + | rddId=$rddId, + | rddScopeId=$rddScopeId, + | lastSqlExecutionId=$lastSqlExecutionId, + | partitionPartialVals=$partVals, + | stageIds=$sIds, + | stageAttemptIds=$saIds, + | taskAttemptNumbers=$tans + |)""".stripMargin + } +} + +private object LastAttemptRDDVals { + // EMPTY_ID indicates "no attempt recorded": used as the initial value of the common + // (stageId, stageAttemptId, taskAttemptNumber) before any update, and as the value returned + // by partialValueAt for partitions that have not been computed. + val EMPTY_ID: Int = -1 + + def apply[@specialized T]( + rddId: Int, + rddScopeId: Option[String], + numPartitions: Int)(implicit ct: ClassTag[T]): LastAttemptRDDVals[T] = { + new LastAttemptRDDVals[T](rddId, rddScopeId, new Array[T](numPartitions)) + } + + def createFromFirstUpdate[@specialized T]( + update: AccumulatorPartialVal[T])(implicit ct: ClassTag[T]): LastAttemptRDDVals[T] = { + val newVal = LastAttemptRDDVals[T]( + rddId = update.rddId, + rddScopeId = update.rddScopeId, + update.rddNumPartitions) + newVal.update(update) + newVal + } +} + +private class LastAttemptMap[K, V] { + // Map used to keep metric updates, keyed by RDD id or RDD scope id, backed by a List. + // In the majority of cases (when there are no stage retries and no AQE replanning + // cancelling already running stages), there will be only one key, so a list backed map + // should have less overhead. + // + // Accumulators are modified only from DAGScheduler.updateAccumulators -> mergeLastAttempt, + // which is running from a single thread (scheduling loop), so no concurrency control is needed + // for updates. Read accesses to an immutable list should use a consistent state without extra + // synchronization. + + @volatile private var map: List[(K, V)] = Nil + + def contains(key: K): Boolean = map.exists(_._1 == key) + + def get(key: K): Option[V] = map.collectFirst { case (k, v) if k == key => v } + + def put(key: K, value: V): Unit = synchronized { + map = (key, value) :: map.filterNot(_._1 == key) + } + + def keys: Iterable[K] = map.map(_._1) + def values: Iterable[V] = map.map(_._2) + def isEmpty: Boolean = map.isEmpty + def nonEmpty: Boolean = map.nonEmpty + def clear(): Unit = synchronized { map = Nil } + + override def toString: String = map + .map(elem => s"${elem._1} -> ${elem._2}").mkString("LastAttemptMap {\n", ",\n", "\n}") +} + +private case class AccumulatorPartialVal[PARTIAL]( + partialMergeVal: PARTIAL, + rddId: Int, + rddPartitionId: Int, + rddNumPartitions: Int, + rddScopeId: Option[String], + stageId: Int, + stageAttemptId: Int, + taskAttemptNumber: Int, + sqlExecutionId: Option[Long] +) { + override def toString: String = { + s"""AccumulatorPartialVal( + | partialMergeVal=$partialMergeVal, + | rddId=$rddId, + | rddPartitionId=$rddPartitionId, + | rddNumPartitions=$rddNumPartitions, + | rddScopeId=$rddScopeId, + | stageId=$stageId, + | stageAttemptId=$stageAttemptId, + | taskAttemptNumber=$taskAttemptNumber, + | sqlExecutionId=$sqlExecutionId + |)""".stripMargin + } + + /** Tuple of stage id, stage attempt id and taskAttemptNumber, defining the order of attempts. */ + val attempt: (Int, Int, Int) = (stageId, stageAttemptId, taskAttemptNumber) +} + +/** + * A trait that can be mixed into a subclass of [[AccumulatorV2]] to track the "logical" + * value of the "last attempt" of the execution using the accumulator - aggregated from the last + * attempts of any Task that calculated some RDD partitions and used this accumulator, and + * discarding any values coming from earlier attempts that have been recomputed. + * If the accumulator is used by multiple RDDs, the last attempt value is tracked separately for + * each, and can be retrieved for each or all of them separately, see lastAttemptValueForX methods. + * If the accumulator is used directly on the Spark Driver using [[AccumulatorV2#add]], + * that value is considered the last attempt value. + * If the accumulator was both used in Tasks and updated directly on the driver, it can't determine + * what should be considered the last attempt, and lastAttemptValueForX methods will return None. + * + * Contract for driver-only updates: + * A driver-side value (set via [[AccumulatorV2#add]] on the driver, outside any Task) is only + * returned by methods that do not narrow by RDD, namely [[lastAttemptValueForAllRDDs]] and + * [[lastAttemptValueForHighestRDDId]]. Methods that narrow to specific RDDs or RDD scopes + * ([[lastAttemptValueForRDDId]], [[lastAttemptValueForRDDIds]], [[lastAttemptValueForRDDScopes]]) + * return the zero value when a driver-only value is present, because a driver-side update cannot + * be attributed to any particular RDD or scope. + * + * [[LastAttemptAccumulator]] is not reset by the [[AccumulatorV2#reset]] method implementation, + * and its state is not copied by the [[AccumulatorV2#copy]] method implementation, and it should + * not be serialized to the Executors. The internal state should only be initialized by the + * [[initializeLastAttemptAccumulator]] method on the "main" instance of the accumulator, that was + * created and registered with [[AccumulatorContext]] with [[AccumulatorV2#register]]. All the + * interfaces of [[LastAttemptAccumulator]]: [[mergeLastAttempt]] (used only by DAGScheduler) and + * lastAttemptValueForX, [[logAccumulatorState]] (used by the using code) should only be invoked on + * that instance, on the Spark Driver. + * + * The [[LastAttemptAccumulator]] is not thread-safe. [[mergeLastAttempt]] should only be used by + * DAGScheduler, by the scheduler thread. Retrieving the value using lastAttemptValueForXXX while + * it is concurrently updated (execution is running) can produce some inconsistencies, but should + * not crash. + * If an RDD using the [[LastAttemptAccumulator]] is used concurrently by multiple actions that + * all try to recompute it, it may produce unexpected results and the semantics of what is "last + * attempt" becomes ambiguous. This should not be done in practice, and will likely result in more + * unexpected behaviours in Spark. + * + * Implementations must implement [[partialMergeVal]] and [[partialMerge]] methods operating on + * PARTIAL type. In regular [[AccumulatorV2]] implementations, the [[AccumulatorV2]] object + * itself holds the intermediate value of the accumulator, and [[AccumulatorV2#merge]] method is + * used to merge these objects together. [[LastAttemptAccumulator]] needs to keep track of partial + * values of every partition of every RDD that used the accumulator, and holding a full + * [[AccumulatorV2]] object for each would have a high overhead. Therefore, an implementation should + * be able to return PARTIAL value from [[partialMergeVal]] that represents an intermediate + * mergeable value, and a [[partialMerge]] method that can merge that value into the accumulator. + * Implementations must also implement an [[isMergeable]] method that checks if the other + * [[AccumulatorV2]] is of a compatible type to be merged with this using [[partialMergeVal]]. In + * regular [[AccumulatorV2]] implementations, this check is normally done inside the + * [[AccumulatorV2#merge]] method, which is not used here. + * + * If an implementation is used to keep user data in the accumulator, it should override + * [[accumulatorStoresUserData]] to return true, to ensure correct structured logging annotation. + * Otherwise it should override it to false. + */ +trait LastAttemptAccumulator[IN, OUT, PARTIAL] extends Logging { + this: AccumulatorV2[IN, OUT] => + + // For every RDD that participated in the computation of this accumulator, keep the partial + // value of the accumulator for the latest stage and stage attempt that computed it. + // Keyed by rdd.id. + // Only kept and accessed on the driver, in the instance of the LastAttemptAccumulator that was + // created and registered with AccumulatorContext with AccumulatorV2.register(). + // Should not be copied / reset by the implementation of copy() / reset() functions. + // Transient: only needed on the driver and doesn't need to be serialized. + @transient + private var lastAttemptRddsMap: LastAttemptMap[Int, LastAttemptRDDVals[PARTIAL]] = _ + + // ClassTag for PARTIAL, captured at initialization time. + @transient private var partialClassTag: ClassTag[PARTIAL] = _ + + // Metric value set directly on the driver, not from within a task. + // Only kept and accessed on the driver, in the instance of the LastAttemptAccumulator that was + // created and registered with AccumulatorContext with AccumulatorV2.register(). + // Should not be copied / reset by the implementation of copy() / reset() functions. + // Transient: only needed on the driver and doesn't need to be serialized. + @transient + private var lastAttemptDirectDriverValue: Option[OUT] = _ + + // Flipped to true if unexpected metrics updates are received and we can no longer reason + // about the last attempt. + // Should not be copied / reset by the implementation of copy() / reset() functions. + // Transient: only needed on the driver and doesn't need to be serialized. + @transient + protected var lastAttemptAccumulatorInvalid: Boolean = false + + // Indicates that the LastAttemptAccumulator has been initialized. + // It is initialized in assertValid(). + // Should not be copied / reset by the implementation of copy() / reset() functions. + // Transient: only needed on the driver and doesn't need to be serialized. + @transient + protected var lastAttemptAccumulatorInitialized: Boolean = false + + /** Reset the state of the last attempt accumulator, discarding all the past attempts, and + * making it valid again if it was invalidated. */ + def resetLastAttemptAccumulator(): Unit = try { + lastAttemptRddsMap.clear() + lastAttemptDirectDriverValue = None + lastAttemptAccumulatorInvalid = false + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in resetLastAttemptAccumulator", + exception = Some(e)) + } + + def initializeLastAttemptAccumulator()(implicit ct: ClassTag[PARTIAL]): Unit = try { + assert(isAtDriverSide) + assert(!lastAttemptAccumulatorInitialized) + assert(!lastAttemptAccumulatorInvalid) + assert(lastAttemptRddsMap == null) + assert(lastAttemptDirectDriverValue == null) + partialClassTag = ct + lastAttemptRddsMap = new LastAttemptMap[Int, LastAttemptRDDVals[PARTIAL]] + lastAttemptDirectDriverValue = None + lastAttemptAccumulatorInitialized = true + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in initializeLastAttemptAccumulator", + exception = Some(e)) + } + + private def accumulatorId: Long = { + // This can throw if this is a copy/serialized accumulator, + // not the instance registered with AccumulatorContext. + // Catch it so we can safely use it for logging in unexpected situations. + try { + this.id + } catch { + case NonFatal(e) => + logWarning(log"Unexpected exception in getting accumulator id", e) + -1L // needs to be a long for LogKeys.ACCUMULATOR_ID + } + } + + /** Log entry to log debug information about the internal state of the accumulator. */ + def logAccumulatorState: LogEntry = try { + log"""LastAttemptAccumulator id=${MDC(LogKeys.ACCUMULATOR_ID, accumulatorId)}: + |Invalidated: ${MDC(LogKeys.LAST_ATTEMPT_ACC_INVALIDATE, lastAttemptAccumulatorInvalid)}. + |Direct driver value: ${MDC(logKeyAccumulatorState, lastAttemptDirectDriverValue)}. + |Value: ${MDC(logKeyAccumulatorState, value)}. + |lastAttemptRddsMap: + |${MDC(logKeyAccumulatorState, lastAttemptRddsMap)}.""" + .stripMargin + } catch { + case NonFatal(e) => + logWarning(log"Unexpected exception in logAccumulatorState", e) + log"" + } + + private def logAccumulatorUpdate( + newAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None, + oldAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None): LogEntry = try { + log"""Old partial RDD value: ${MDC(logKeyAccumulatorState, oldAccumPartialValue)}. + |New partial RDD value: ${MDC(logKeyAccumulatorState, newAccumPartialValue)}.""" + .stripMargin + } catch { + case NonFatal(e) => + logWarning(log"Unexpected exception in logAccumulatorUpdate", e) + log"" + } + + private def unexpectedLastAttemptMetricUpdate( + invalidate: Boolean, + reason: String, + exception: Option[Throwable] = None, + newAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None, + oldAccumPartialValue: Option[AccumulatorPartialVal[PARTIAL]] = None): Unit = { + val logEntry = + log"""Unexpected last attempt tracking for accumulator ${ + MDC(LogKeys.ACCUMULATOR_ID, accumulatorId)}. + |Invalidate: ${MDC(LogKeys.LAST_ATTEMPT_ACC_INVALIDATE, invalidate)}. + |Reason: ${MDC(LogKeys.LAST_ATTEMPT_ACC_UNEXPECTED_REASON, reason)}. + |""".stripMargin + + log"State:\n" + logAccumulatorState + + log"Update:\n" + logAccumulatorUpdate(newAccumPartialValue, oldAccumPartialValue) + exception match { + case Some(e) => logWarning(logEntry, e) + case None => logWarning(logEntry) + } + if (invalidate) { + lastAttemptAccumulatorInvalid = true + } + if (Utils.isTesting && lastAttemptAccumulatorInitialized && exception.isDefined) { + // If this is a test, rethrow the exception. + // (Rethrow only if lastAttemptAccumulatorInitialized. In some tests, we check for proper + // graceful handling of unexpected exceptions in accumulators that are not properly + // initialized, so we don't want to throw there.) + throw exception.get + } + } + + protected def unexpectedLastAttemptMetricOperation( + invalidate: Boolean, + reason: String, + exception: Option[Throwable] = None): Unit = { + // subclasses don't have visibility of private class AccumulatorPartialVal. + unexpectedLastAttemptMetricUpdate( + invalidate = invalidate, + reason = reason, + exception = exception, + newAccumPartialValue = None, + oldAccumPartialValue = None) + } + + /** Set of assertions that should always hold for a valid [[LastAttemptAccumulator]]. */ + protected def assertValid(): Unit = { + assert(lastAttemptAccumulatorInitialized) + assert(!lastAttemptAccumulatorInvalid) + assert(isAtDriverSide) + assert(metadata != null) + assert(!metadata.countFailedValues) + assert(lastAttemptDirectDriverValue.isEmpty || lastAttemptRddsMap.isEmpty) + } + + /** + * Accumulator subclasses where metric values can contain user data (for example, maximum of + * processed values, observable metrics) as opposed to system measurements (for example, count + * of processed rows) should return true to ensure correct structured logging annotation. + */ + protected def accumulatorStoresUserData: Boolean + + protected def logKeyAccumulatorState: LogKey = { + if (accumulatorStoresUserData) { + LogKeys.LAST_ATTEMPT_ACC_USER_METRIC + } else { + LogKeys.LAST_ATTEMPT_ACC_SYSTEM_METRIC + } + } + + /** Return intermediate value of PARTIAL type that can be merged together by partialMerge. */ + protected def partialMergeVal: PARTIAL + + /** Merge together partial values of PARTIAL type returned by partialMergeVal. */ + protected def partialMerge(otherVal: PARTIAL): Unit + + /** Check if the other accumulator is mergeable with this one. */ + protected def isMergeable(other: AccumulatorV2[_, _]): Boolean + + /** + * Check if the value is set on the driver side, not from within a task. + * This must be called from `add` and `set` methods of any AccumulatorV2 subclass supporting + * last attempt metrics to set what the `value` of the metric is after the operation. + */ + protected def setValueIfOnDriverSide(value: OUT): Unit = try { + if (isAtDriverSide && lastAttemptAccumulatorInitialized && !lastAttemptAccumulatorInvalid) { + // Direct update on the driver, not from within a task. + // This gives little information about the source of the update, so we can't reason about + // "last attempt" if it's mixed with non-driver updates. + lastAttemptDirectDriverValue = Some(value) + if (lastAttemptRddsMap.nonEmpty) { + unexpectedLastAttemptMetricUpdate( + invalidate = true, + reason = "Incoming direct driver value while task updates exist") + } + } + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in setValueIfOnDriverSide", + exception = Some(e)) + } + + /** + * It needs Task and Stage information to reason about the last attempt. + * + * Called from a single thread in DAGScheduler, no synchronization needed. + * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that + * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]]. + */ + private[spark] def mergeLastAttempt( + other: AccumulatorV2[_, _], + rdd: RDD[_], + taskInfo: TaskInfo, + stageId: Int, + stageAttemptId: Int, + localProperties: java.util.Properties): Unit = try { + implicit val ct: ClassTag[PARTIAL] = partialClassTag + if (lastAttemptAccumulatorInvalid) return + // Skip zero-value updates. They contribute nothing to the aggregate and can come + // from stages where the accumulator was present in the task closure but never incremented. + if (other.isZero) return + assertValid() + + if (!isMergeable(other)) { + // This should never happen. + unexpectedLastAttemptMetricUpdate( + invalidate = true, + "Merging accumulators of different types") + return + } + + if (!other.isInstanceOf[LastAttemptAccumulator[_, _, _]]) { + // This should never happen. + unexpectedLastAttemptMetricUpdate( + invalidate = true, + "Merging with accumulator which is not SLAM") + return + } + val lastAttemptOther = other + .asInstanceOf[LastAttemptAccumulator[IN, OUT, PARTIAL]] + + val update = AccumulatorPartialVal( + partialMergeVal = lastAttemptOther.partialMergeVal, + rddId = rdd.id, + rddPartitionId = taskInfo.partitionId, + rddNumPartitions = rdd.getNumPartitions, + rddScopeId = rdd.scope.map(_.id), + stageId = stageId, + stageAttemptId = stageAttemptId, + taskAttemptNumber = taskInfo.attemptNumber, + sqlExecutionId = + Option(localProperties.getProperty(SparkContext.SQL_EXECUTION_ID_KEY)).map(_.toLong)) + + if (lastAttemptDirectDriverValue.nonEmpty) { + unexpectedLastAttemptMetricUpdate(invalidate = true, + "Incoming task updates while direct driver value exists", + newAccumPartialValue = Some(update)) + return + } + + lastAttemptRddsMap.get(update.rddId) match { + case Some(oldRDDValue) => // This RDD was already seen. + val oldValue = oldRDDValue.partialValueAt(update.rddPartitionId) + + logTrace(log"mergeLastAttempt existing RDD update:\n" + + log"${MDC(logKeyAccumulatorState, oldRDDValue)}\n" + + logAccumulatorUpdate( + newAccumPartialValue = Some(update), oldAccumPartialValue = Some(oldValue))) + + // Check basic consistency + if (oldValue.rddNumPartitions != update.rddNumPartitions) { + unexpectedLastAttemptMetricUpdate( + invalidate = true, + reason = "RDD with changing number of partitions", + newAccumPartialValue = Some(update), + oldAccumPartialValue = Some(oldValue)) + return + } + if (oldValue.rddScopeId != update.rddScopeId) { + unexpectedLastAttemptMetricUpdate( + invalidate = true, + reason = "RDD with changing RDDOperationScope", + newAccumPartialValue = Some(update), + oldAccumPartialValue = Some(oldValue)) + return + } + + if (oldRDDValue.isEmptyAt(update.rddPartitionId)) { + // No previous attempt for this RDD partition. + oldRDDValue.update(update) + } else { + if (update.attempt > oldValue.attempt) { + // New last attempt for this RDD partition. + oldRDDValue.update(update) + } else if (update.attempt == oldValue.attempt) { + // Same attempt, should not happen. + unexpectedLastAttemptMetricUpdate( + invalidate = true, + reason = "Same stage, stageAttemptId and taskAttemptNumber reported multiple times", + newAccumPartialValue = Some(update), + oldAccumPartialValue = Some(oldValue)) + } + // else: Older attempt reported after newer attempt. Not fatal, discard it. + } + + case None => // First time we see this RDD. + logTrace(log"mergeLastAttempt new RDD update:\n" + logAccumulatorUpdate( + newAccumPartialValue = Some(update), oldAccumPartialValue = None)) + val newVal = LastAttemptRDDVals.createFromFirstUpdate(update) + lastAttemptRddsMap.put(update.rddId, newVal) + } + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricUpdate( + invalidate = true, + reason = "Unexpected exception in mergeLastAttempt", + exception = Some(e)) + } + + /** Accumulates last attempt values from given RDD into an acc. */ + private def lastAttemptValueAggregateInternal(rddId: Int, acc: this.type) = { + // Note: even if the given RDD is not present, we can't tell if it executed but just never + // updated this accumulator, so we still report the zero value back. + for { + lastAttemptVal <- lastAttemptRddsMap.get(rddId) + partitionId <- lastAttemptVal.partitionPartialVals.indices + } { + // Some partitions may not be computed. + // May be because of operations like take. + // May be because of AQE coalescing executing tasks covering multiple partitions. + if (!lastAttemptVal.isEmptyAt(partitionId)) { + acc.partialMerge(lastAttemptVal.partitionPartialVals(partitionId)) + } + } + } + + /** + * Returns the last attempt value of this accumulator, aggregated from a set of RDDs. + * + * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that + * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]]. + * + * @return None if the last attempt value cannot be established, Some(value) otherwise. + */ + def lastAttemptValueForRDDIds(rddIds: Seq[Int]): Option[OUT] = try { + if (lastAttemptAccumulatorInvalid) return None + assertValid() + if (lastAttemptDirectDriverValue.isDefined) { + // return zero value if there is no RDD execution recorded. + return Some(copyAndReset().asInstanceOf[this.type].value) + } + + val acc = copyAndReset().asInstanceOf[this.type] + rddIds.distinct.foreach(lastAttemptValueAggregateInternal(_, acc)) + Some(acc.value) + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in lastAttemptValueForRDDs", + exception = Some(e)) + None + } + + /** + * Returns the last attempt value of this accumulator, aggregated from a specific RDD. + * + * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that + * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]]. + * + * @return None if the last attempt value cannot be established, Some(value) otherwise. + */ + def lastAttemptValueForRDDId(rddId: Int): Option[OUT] = try { + lastAttemptValueForRDDIds(Seq(rddId)) + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in lastAttemptValueForRDD", + exception = Some(e)) + None + } + + /** + * Returns the last attempt value of this accumulator, aggregated from all RDDs that ever + * returned any values for it. + * + * If the metric was used directly on the driver, and was not used in any RDD execution, + * the driver value will be used instead. + * + * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that + * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]]. + * + * @return None if the last attempt value cannot be established, Some(value) otherwise. + */ + def lastAttemptValueForAllRDDs(): Option[OUT] = try { + if (lastAttemptAccumulatorInvalid) return None + assertValid() + if (lastAttemptDirectDriverValue.isDefined) return lastAttemptDirectDriverValue + lastAttemptValueForRDDIds(lastAttemptRddsMap.keys.toSeq) + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in lastAttemptValueForAllRDDs", + exception = Some(e)) + None + } + + /** + * Returns the last attempt value of this accumulator, aggregated from the RDD with the highest + * id that ever returned any values for it. + * + * If the metric was used directly on the driver, and was not used in any RDD execution, + * the driver value will be used instead. + * + * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that + * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]]. + * + * @return None if the last attempt value cannot be established, Some(value) otherwise. + */ + def lastAttemptValueForHighestRDDId(): Option[OUT] = try { + if (lastAttemptAccumulatorInvalid) return None + assertValid() + if (lastAttemptDirectDriverValue.isDefined) return lastAttemptDirectDriverValue + + if (lastAttemptRddsMap.nonEmpty) { + lastAttemptValueForRDDId(lastAttemptRddsMap.keys.max) + } else { + // return zero value if there is no RDD execution recorded. + Some(copyAndReset().asInstanceOf[this.type].value) + } + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in lastAttemptValueForHighestRDDId", + exception = Some(e)) + None + } + + /** + * Returns the last attempt value of this accumulator, aggregated from RDDs with given scope ids. + * + * Should be used only on the Spark Driver, on the instance of [[LastAttemptAccumulator]] that + * was created and registered in [[AccumulatorContext]] by [[AccumulatorV2#register]]. + * + * @return None if the last attempt value cannot be established, Some(value) otherwise. + */ + def lastAttemptValueForRDDScopes(rddScopeIds: Seq[String]): Option[OUT] = try { + if (lastAttemptAccumulatorInvalid) return None + assertValid() + if (lastAttemptDirectDriverValue.isDefined) { + // Return zero value if there is no RDD execution recorded. + return Some(copyAndReset().asInstanceOf[this.type].value) + } + val scopesLookup = rddScopeIds.toSet + val matchingRDDs = lastAttemptRddsMap.values.filter { rddVal => + rddVal.rddScopeId.exists(scopesLookup.contains) + }.toSeq + // When multiple RDDs share the same scope (e.g. repeated Dataset.collect() calls create + // new wrapper RDDs in the same scope, or BroadcastNestedLoopJoin executing the probe side + // twice), only aggregate the latest one per scope, identified by the highest RDD id. + // RDD ids are globally monotonic, so the highest id is the latest. + val rddIds = matchingRDDs.groupBy(_.rddScopeId).values.map(_.maxBy(_.rddId).rddId).toSeq + lastAttemptValueForRDDIds(rddIds) + } catch { + case NonFatal(e) => + unexpectedLastAttemptMetricOperation( + invalidate = true, + reason = "Unexpected exception in lastAttemptValueForRDDScopes", + exception = Some(e)) + None + } + + /** Visible for testing. */ + def getDirectDriverValue: Option[OUT] = { + lastAttemptDirectDriverValue + } + + /** Visible for testing */ + def getHighestRDDId: Option[Int] = { + if (lastAttemptRddsMap.nonEmpty) Some(lastAttemptRddsMap.keys.max) else None + } + + /** Visible for testing */ + def getNumRDDs: Int = { + lastAttemptRddsMap.keys.size + } + + /** Visible for testing */ + def getValid: Boolean = { + !lastAttemptAccumulatorInvalid + } +} diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala index d22e14d992655..54f0ec6505b81 100644 --- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import com.google.common.util.concurrent.ThreadFactoryBuilder -import org.apache.spark.SparkException +import org.apache.spark.{SparkException, SparkThrowable} private[spark] object ThreadUtils { @@ -358,10 +358,26 @@ private[spark] object ThreadUtils { def awaitResult[T](awaitable: Awaitable[T], atMost: Duration): T = { SparkThreadUtils.awaitResult(awaitable, atMost) } + + @throws(classOf[SparkException]) + def awaitResult[T]( + awaitable: Awaitable[T], + atMost: Duration, + preserveSparkThrowable: Boolean): T = { + SparkThreadUtils.awaitResult(awaitable, atMost, preserveSparkThrowable) + } // scalastyle:on awaitresult @throws(classOf[SparkException]) def awaitResult[T](future: JFuture[T], atMost: Duration): T = { + awaitResult(future, atMost, preserveSparkThrowable = false) + } + + @throws(classOf[SparkException]) + def awaitResult[T]( + future: JFuture[T], + atMost: Duration, + preserveSparkThrowable: Boolean): T = { try { atMost match { case Duration.Inf => future.get() @@ -370,6 +386,16 @@ private[spark] object ThreadUtils { } catch { case e: SparkFatalException => throw e.throwable + // JFuture.get() wraps exceptions in ExecutionException. Unwrap and check if the + // cause carries a structured condition (SparkThrowable) to preserve the SQL state. + case e: ExecutionException + if preserveSparkThrowable + && e.getCause.isInstanceOf[SparkThrowable] + && e.getCause.asInstanceOf[SparkThrowable].getCondition != null => + // Attach the caller's stack trace so it's not lost when re-throwing from a worker thread. + e.getCause.addSuppressed( + new SparkException("Exception thrown in awaitResult", cause = null)) + throw e.getCause case NonFatal(t) if !t.isInstanceOf[TimeoutException] => throw new SparkException("Exception thrown in awaitResult: ", t) @@ -407,6 +433,11 @@ private[spark] object ThreadUtils { } } + /** See the overloaded [[parmap]] for full documentation. */ + def parmap[I, O](in: Seq[I], prefix: String, maxThreads: Int)(f: I => O): Seq[O] = { + parmap(in, prefix, maxThreads, preserveSparkThrowable = false)(f) + } + /** * Transforms input collection by applying the given function to each element in parallel fashion. * Comparing to the map() method of Scala parallel collections, this method can be interrupted @@ -419,13 +450,19 @@ private[spark] object ThreadUtils { * @param in - the input collection which should be transformed in parallel. * @param prefix - the prefix assigned to the underlying thread pool. * @param maxThreads - maximum number of thread can be created during execution. + * @param preserveSparkThrowable if true, re-throw exceptions that already carry a structured + * error class (SparkThrowable) instead of wrapping them in a generic SparkException. * @param f - the lambda function will be applied to each element of `in`. * @tparam I - the type of elements in the input collection. * @tparam O - the type of elements in resulted collection. * @return new collection in which each element was given from the input collection `in` by * applying the lambda function `f`. */ - def parmap[I, O](in: Seq[I], prefix: String, maxThreads: Int)(f: I => O): Seq[O] = { + def parmap[I, O]( + in: Seq[I], + prefix: String, + maxThreads: Int, + preserveSparkThrowable: Boolean)(f: I => O): Seq[O] = { val pool = newForkJoinPool(prefix, maxThreads) try { implicit val ec: ExecutionContextExecutor = ExecutionContext.fromExecutor(pool) @@ -433,7 +470,7 @@ private[spark] object ThreadUtils { val futures = in.map(x => Future(f(x))) val futureSeq = Future.sequence(futures) - awaitResult(futureSeq, Duration.Inf) + awaitResult(futureSeq, Duration.Inf, preserveSparkThrowable) } finally { pool.shutdownNow() } diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java index 9ce43d32c1b10..d59bcfc2bd131 100644 --- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java +++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; import java.util.Arrays; +import java.util.BitSet; import java.util.LinkedList; import java.util.UUID; @@ -36,6 +37,8 @@ import org.apache.spark.executor.ShuffleWriteMetrics; import org.apache.spark.executor.TaskMetrics; import org.apache.spark.internal.config.package$; +import org.apache.spark.memory.MemoryConsumer; +import org.apache.spark.memory.MemoryMode; import org.apache.spark.memory.TestMemoryManager; import org.apache.spark.memory.TaskMemoryManager; import org.apache.spark.serializer.JavaSerializer; @@ -852,6 +855,86 @@ public void testBoundedMergeWithInterleavedData() throws Exception { assertSpillFilesWereCleanedUp(); } + @Test + public void testBoundedMergeSnapshotIsolatedFromConcurrentSpill() throws Exception { + // Verifies the prepareBoundedMerge() seam contract: ctx.snapshot is a defensive + // copy frozen at prepare-time, isolated from any later mutation of the live + // spillWriters list. The test drives the worst-case scenario by direct sequencing: + // an external-trigger spill() (the route a sibling MemoryConsumer takes under + // memory pressure) appends a writer to live spillWriters AND rebinds + // readingIterator.upstream to read it -- the merger must consume that file exactly + // once via readingIterator, not twice via the snapshot. + final UnsafeExternalSorter sorter = newSorter(); + sorter.setSpillMergeFactor(2); + + final int numSpills = 4; + final int recordsPerSpill = 8; + final int totalSpilled = numSpills * recordsPerSpill; + final int inMemRecords = 5; + final int totalRecords = totalSpilled + inMemRecords; + + // Build numSpills spills with disjoint, interleaved keys. + for (int spill = 0; spill < numSpills; spill++) { + for (int j = 0; j < recordsPerSpill; j++) { + insertNumber(sorter, spill + j * numSpills); + } + sorter.spill(); + } + // Leave a few records in memory so readingIterator has unread data that a + // concurrent spill() can drain into a new spill file. + for (int j = 0; j < inMemRecords; j++) { + insertNumber(sorter, totalSpilled + j); + } + + // Phase 1: snapshot + publish readingIterator (production order). + UnsafeExternalSorter.BoundedMergerContext ctx = sorter.prepareBoundedMerge(); + assertNotNull(ctx.inMemIter, + "readingIterator should be published when inMemSorter has data"); + final int snapshotSizeBefore = ctx.snapshot.size(); + final int spillFilesBefore = spillFilesCreated.size(); + + // Phase 2: external-trigger spill. Routes through readingIterator.spill(): + // appends a writer to the live spillWriters AND rebinds readingIterator.upstream. + final MemoryConsumer externalTrigger = + new MemoryConsumer(taskMemoryManager, MemoryMode.ON_HEAP) { + @Override + public long spill(long size, MemoryConsumer trigger) { + return 0; + } + }; + long bytesSpilled = sorter.spill(Long.MAX_VALUE, externalTrigger); + assertTrue(bytesSpilled > 0L, + "external-trigger spill must fire to exercise the seam contract"); + // Exactly one new spill file should have been produced by the external-trigger spill. + assertEquals(spillFilesBefore + 1, spillFilesCreated.size(), + "external-trigger spill should produce exactly one new spill file"); + // Defensive-copy invariant: the post-spill snapshot is unchanged. A future + // refactor that aliases ctx.snapshot to the live spillWriters field instead of + // copying it would fail this assertion. + assertEquals(snapshotSizeBefore, ctx.snapshot.size(), + "ctx.snapshot must be isolated from live spillWriters mutation"); + + // Phase 3: merge using the frozen snapshot. + UnsafeSorterIterator iter = ctx.merger.merge(ctx.snapshot, ctx.inMemIter); + + // Each input record must appear exactly once: no duplicates, no losses. + BitSet seen = new BitSet(totalRecords); + int count = 0; + while (iter.hasNext()) { + iter.loadNext(); + int v = Platform.getInt(iter.getBaseObject(), iter.getBaseOffset()); + assertTrue(v >= 0 && v < totalRecords, "record out of range: " + v); + assertFalse(seen.get(v), "duplicate record observed: " + v); + seen.set(v); + count++; + } + assertEquals(totalRecords, count, "wrong record count"); + assertEquals(totalRecords, seen.cardinality(), "missing records"); + + sorter.cleanupResources(); + assertSpillFilesWereCleanedUp(); + } + @Test public void testBoundedMergeWithDuplicateKeys() throws Exception { // Multiple spills contain identical keys. Verifies that all duplicates are diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala index 05709c9bdd756..813de4132ab2d 100644 --- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala @@ -17,9 +17,6 @@ package org.apache.spark -import java.lang.ref.WeakReference -import java.util.concurrent.TimeUnit - import scala.collection.mutable.HashSet import scala.util.Random @@ -96,18 +93,6 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So rdd } - /** Run GC and make sure it actually has run */ - protected def runGC(): Unit = { - val weakRef = new WeakReference(new Object()) - val startTimeNs = System.nanoTime() - System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC. - // Wait until a weak reference object has been GCed - while (System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(10) && weakRef.get != null) { - System.gc() - Thread.sleep(200) - } - } - protected def cleaner = sc.cleaner.get } diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala index 15e150ab8b933..a0f17f8af3f33 100644 --- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala @@ -17,6 +17,9 @@ package org.apache.spark +import java.lang.ref.WeakReference +import java.util.concurrent.TimeUnit + import scala.annotation.tailrec import org.scalactic.source.Position @@ -97,4 +100,26 @@ abstract class SparkFunSuite test(testNamePrefix + s" ${param._1}", testTags: _*)(testFun(param._2)) } } + + /** Run GC and make sure it actually has run. */ + protected def runGC(): Unit = { + val weakRef = new WeakReference(new Object()) + val startTimeNs = System.nanoTime() + System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC. + // Wait until a weak reference object has been GCed + while (System.nanoTime() - startTimeNs < TimeUnit.SECONDS.toNanos(10) && weakRef.get != null) { + System.gc() + Thread.sleep(200) + } + } + + /** Run `body`; if it throws OutOfMemoryError, force a GC and retry once. */ + protected def retryOnOOM[T](body: => T): T = { + try body + catch { + case _: OutOfMemoryError => + runGC() + body + } + } } diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala index 518a8c8b3d055..6d2c663a2588e 100644 --- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala @@ -23,10 +23,11 @@ import com.fasterxml.jackson.core.JsonParseException import org.json4s._ import org.json4s.jackson.JsonMethods -import org.apache.spark.{JsonTestUtils, SparkFunSuite} +import org.apache.spark.{JsonTestUtils, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse} import org.apache.spark.deploy.master.{ApplicationInfo, RecoveryState, WorkerInfo} import org.apache.spark.deploy.worker.ExecutorRunner +import org.apache.spark.util.Utils class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils { @@ -45,7 +46,7 @@ class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils { } test("writeApplicationDescription") { - val output = JsonProtocol.writeApplicationDescription(createAppDesc()) + val output = JsonProtocol.writeApplicationDescription(createAppDesc(), new SparkConf()) assertValidJson(output) assertValidDataInJson(output, JsonMethods.parse(JsonConstants.appDescJsonStr)) } @@ -105,6 +106,38 @@ class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils { assertValidDataInJson(output, JsonMethods.parse(JsonConstants.workerStateJsonStr)) } + test("SPARK-57098: secrets in executor command are redacted in worker JSON endpoint") { + val conf = new SparkConf() + val secretEnv = Map( + "HADOOP_CREDSTORE_PASSWORD" -> "topsecret", + "JAVA_HOME" -> "/usr/lib/jvm/default", + "AWS_SECRET_ACCESS_KEY" -> "aws-secret-value") + val secretJavaOpts = Seq( + "-Dspark.ssl.keyStorePassword=ssl-secret", + "-Dspark.executorEnv.PASSWORD=env-secret", + "-Xmx2g") + val cmd = new Command( + "mainClass", List("arg1"), secretEnv, Seq(), Seq(), secretJavaOpts) + val appDesc = new ApplicationDescription( + "name", Some(4), cmd, "appUiUrl", defaultResourceProfile) + + val output = JsonProtocol.writeApplicationDescription(appDesc, conf) + val commandStr = (output \ "command") match { + case JString(s) => s + case other => fail(s"Expected JString for 'command', got: $other") + } + + // Sensitive values are scrubbed. + assert(!commandStr.contains("topsecret")) + assert(!commandStr.contains("ssl-secret")) + assert(!commandStr.contains("env-secret")) + assert(!commandStr.contains("aws-secret-value")) + assert(commandStr.contains(Utils.REDACTION_REPLACEMENT_TEXT)) + // Non-sensitive values pass through. + assert(commandStr.contains("/usr/lib/jvm/default")) + assert(commandStr.contains("-Xmx2g")) + } + test("SPARK-46883: writeClusterUtilization") { val workers = Array(createWorkerInfo(), createWorkerInfo()) val activeApps = Array(createAppInfo()) diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index cb1906679e550..829010179bda4 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -212,6 +212,10 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P SparkListenerApplicationEnd(2L) ) logFile2.setReadable(false, false) + // setReadable(false) is a no-op for root users since they bypass file + // permission checks. Skip the test in that case. + assume(!logFile2.canRead, "Test requires the file to be unreadable; " + + "skipping when running as root.") updateAndCheck(provider) { list => list.size should be (1) diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala index d6f0bfd237e4d..d153800acf3ff 100644 --- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala +++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala @@ -21,7 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.Locale import org.apache.spark.{SparkConf, SparkFunSuite, SparkIllegalArgumentException} -import org.apache.spark.internal.config.IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED +import org.apache.spark.internal.config.{IO_COMPRESSION_CODEC, IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED} import org.apache.spark.util.Utils class CompressionCodecSuite extends SparkFunSuite { @@ -47,7 +47,9 @@ class CompressionCodecSuite extends SparkFunSuite { test("default compression codec") { val codec = CompressionCodec.createCodec(conf) - assert(codec.getClass === classOf[LZ4CompressionCodec]) + assert(codec.getClass.getName === + CompressionCodec.shortCompressionCodecNames( + IO_COMPRESSION_CODEC.defaultValueString)) testCodec(codec) } diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala index d74bc26999447..59c0a5085e93c 100644 --- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala @@ -26,7 +26,7 @@ import scala.util.Random import org.scalatest.concurrent.Eventually._ -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkException, SparkFunSuite, SparkThrowable} class ThreadUtilsSuite extends SparkFunSuite { @@ -229,4 +229,90 @@ class ThreadUtilsSuite extends SparkFunSuite { assert(!t.isAlive) } } + + test("awaitResult preserves SparkThrowable when flag is true") { + import java.io.IOException + + val sparkThrowableEx = new RuntimeException("structured error") with SparkThrowable { + override def getCondition: String = "TEST_ERROR_CLASS" + override def getMessageParameters: java.util.Map[String, String] = + java.util.Collections.emptyMap() + } + + // With preserveSparkThrowable=true, SparkThrowable is re-thrown directly. + val f1 = Future { + throw sparkThrowableEx + }(ThreadUtils.sameThread) + val caught1 = intercept[RuntimeException] { + ThreadUtils.awaitResult(f1, 1.seconds, preserveSparkThrowable = true) + } + assert(caught1.isInstanceOf[SparkThrowable]) + assert(caught1.asInstanceOf[SparkThrowable].getCondition == "TEST_ERROR_CLASS") + assert(caught1.getSuppressed.nonEmpty) + + // With preserveSparkThrowable=false (default), SparkThrowable is wrapped in SparkException. + val f2 = Future { + throw sparkThrowableEx + }(ThreadUtils.sameThread) + val caught2 = intercept[SparkException] { + ThreadUtils.awaitResult(f2, 1.seconds) + } + assert(caught2.getCause.isInstanceOf[SparkThrowable]) + + // Plain exceptions are always wrapped regardless of the flag. + val plainEx = new IOException("plain error") + val f3 = Future { + throw plainEx + }(ThreadUtils.sameThread) + val caught3 = intercept[SparkException] { + ThreadUtils.awaitResult(f3, 1.seconds, preserveSparkThrowable = true) + } + assert(caught3.getCause eq plainEx) + } + + test("awaitResult (JFuture) preserves SparkThrowable when flag is true") { + val sparkThrowableEx = new RuntimeException("structured error") with SparkThrowable { + override def getCondition: String = "TEST_ERROR_CLASS" + override def getMessageParameters: java.util.Map[String, String] = + java.util.Collections.emptyMap() + } + + // scalastyle:off sparkThreadPools + val jfuture = new java.util.concurrent.CompletableFuture[String]() + // scalastyle:on sparkThreadPools + jfuture.completeExceptionally(sparkThrowableEx) + + val caught = intercept[RuntimeException] { + ThreadUtils.awaitResult(jfuture, 10.seconds, preserveSparkThrowable = true) + } + assert(caught.isInstanceOf[SparkThrowable]) + assert(caught.asInstanceOf[SparkThrowable].getCondition == "TEST_ERROR_CLASS") + assert(caught.getSuppressed.nonEmpty) + } + + test("parmap preserves SparkThrowable when flag is true") { + val sparkThrowableEx = new RuntimeException("structured error") with SparkThrowable { + override def getCondition: String = "TEST_ERROR_CLASS" + override def getMessageParameters: java.util.Map[String, String] = + java.util.Collections.emptyMap() + } + + // With preserveSparkThrowable=true, the original SparkThrowable is re-thrown. + val caught1 = intercept[RuntimeException] { + ThreadUtils.parmap(Seq(1), "test", 1, preserveSparkThrowable = true) { _ => + throw sparkThrowableEx + } + } + assert(caught1.isInstanceOf[SparkThrowable]) + assert(caught1.asInstanceOf[SparkThrowable].getCondition == "TEST_ERROR_CLASS") + assert(caught1.getSuppressed.nonEmpty) + + // With preserveSparkThrowable=false, it is wrapped in SparkException. + val caught2 = intercept[SparkException] { + ThreadUtils.parmap(Seq(1), "test", 1, preserveSparkThrowable = false) { _ => + throw sparkThrowableEx + } + } + assert(caught2.getCause.isInstanceOf[SparkThrowable]) + } } diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index e87f3ad026491..4bb46959cef89 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -527,18 +527,24 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties { val scenario4 = new File(testDir, "scenario4") assert(testDir.canWrite) assert(testDir.setWritable(false)) - assert(!Utils.createDirectory(scenario4)) - assert(!scenario4.exists()) - assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario4")) + // Skip when write permission cannot actually be revoked (e.g., running as root). + if (!testDir.canWrite) { + assert(!Utils.createDirectory(scenario4)) + assert(!scenario4.exists()) + assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario4")) + } assert(testDir.setWritable(true)) // 5. The parent directory cannot execute val scenario5 = new File(testDir, "scenario5") assert(testDir.canExecute) assert(testDir.setExecutable(false)) - assert(!Utils.createDirectory(scenario5)) - assert(!scenario5.exists()) - assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario5")) + // Skip when execute permission cannot actually be revoked (e.g., running as root). + if (!testDir.canExecute) { + assert(!Utils.createDirectory(scenario5)) + assert(!scenario5.exists()) + assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario5")) + } assert(testDir.setExecutable(true)) // The following 3 scenarios are only for the method: createDirectory(File) diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala index 7551327d704b4..2767769924bc8 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala @@ -71,7 +71,6 @@ class SorterSuite extends SparkFunSuite { } test("java.lang.ArrayIndexOutOfBoundsException in TimSort") { - System.gc() // scalastyle:off val runLengths = Array(76405736, 74830360, 1181532, 787688, 1575376, 2363064, 3938440, 6301504, 1181532, 393844, 15753760, 1575376, 787688, 393844, 1969220, 3150752, 1181532,787688, 5513816, 3938440, @@ -140,7 +139,10 @@ class SorterSuite extends SparkFunSuite { 21, 20, 22, 18, 452, 114, 95, 18, 17, 21, 36, 18, 17, 115, 76, 144, 44, 38, 61,20, 19, 21, 17) // scalastyle:on val arrayToSortSize = 1091482190 - val arrayToSort = new Array[Byte](arrayToSortSize) + // Memory held by the previous test (e.g. the ~256 MB int array in "SPARK-5984 + // TimSort bug") may not be reclaimed before this >1 GB allocation, causing flaky + // OOM in CI. Force a GC and retry once on OOM. + val arrayToSort = retryOnOOM(new Array[Byte](arrayToSortSize)) var sum: Int = -1 for (i <- runLengths) { sum += i diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml index 9925ae406dbd9..55b1ed40cb9ed 100644 --- a/dev/checkstyle-suppressions.xml +++ b/dev/checkstyle-suppressions.xml @@ -70,4 +70,6 @@ files="src/test/java/org/apache/spark/util/collection/TestTimSort.java" /> + diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh index 43c198301b702..9d93e49e014e4 100755 --- a/dev/create-release/release-tag.sh +++ b/dev/create-release/release-tag.sh @@ -84,7 +84,6 @@ fi # Set the release version in docs sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml -sed -i".tmp3" "s/'facetFilters':.*$/'facetFilters': [\"version:$RELEASE_VERSION\"]/g" docs/_config.yml sed -i".tmp4" 's/__version__: str = .*$/__version__: str = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py git commit -a -m "Preparing Spark release $RELEASE_TAG" @@ -104,8 +103,6 @@ sed -i".tmp6" 's/__version__: str = .*$/__version__: str = "'"$R_NEXT_VERSION.de sed -i".tmp7" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$NEXT_VERSION"'/g' docs/_config.yml # Use R version for short version sed -i".tmp8" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$R_NEXT_VERSION"'/g' docs/_config.yml -# Update the version index of DocSearch as the short version -sed -i".tmp9" "s/'facetFilters':.*$/'facetFilters': [\"version:$R_NEXT_VERSION\"]/g" docs/_config.yml git commit -a -m "Preparing development version $NEXT_VERSION" diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index aaf9679e34f61..a8a9fd81768e2 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -194,35 +194,35 @@ metrics-jmx/4.2.37//metrics-jmx-4.2.37.jar metrics-json/4.2.37//metrics-json-4.2.37.jar metrics-jvm/4.2.37//metrics-jvm-4.2.37.jar minlog/1.3.0//minlog-1.3.0.jar -netty-all/4.2.12.Final//netty-all-4.2.12.Final.jar -netty-buffer/4.2.12.Final//netty-buffer-4.2.12.Final.jar -netty-codec-base/4.2.12.Final//netty-codec-base-4.2.12.Final.jar -netty-codec-compression/4.2.12.Final//netty-codec-compression-4.2.12.Final.jar -netty-codec-dns/4.2.12.Final//netty-codec-dns-4.2.12.Final.jar -netty-codec-http/4.2.12.Final//netty-codec-http-4.2.12.Final.jar -netty-codec-http2/4.2.12.Final//netty-codec-http2-4.2.12.Final.jar -netty-codec-socks/4.2.12.Final//netty-codec-socks-4.2.12.Final.jar -netty-codec/4.2.12.Final//netty-codec-4.2.12.Final.jar -netty-common/4.2.12.Final//netty-common-4.2.12.Final.jar -netty-handler-proxy/4.2.12.Final//netty-handler-proxy-4.2.12.Final.jar -netty-handler/4.2.12.Final//netty-handler-4.2.12.Final.jar -netty-resolver-dns/4.2.12.Final//netty-resolver-dns-4.2.12.Final.jar -netty-resolver/4.2.12.Final//netty-resolver-4.2.12.Final.jar +netty-all/4.2.13.Final//netty-all-4.2.13.Final.jar +netty-buffer/4.2.13.Final//netty-buffer-4.2.13.Final.jar +netty-codec-base/4.2.13.Final//netty-codec-base-4.2.13.Final.jar +netty-codec-compression/4.2.13.Final//netty-codec-compression-4.2.13.Final.jar +netty-codec-dns/4.2.13.Final//netty-codec-dns-4.2.13.Final.jar +netty-codec-http/4.2.13.Final//netty-codec-http-4.2.13.Final.jar +netty-codec-http2/4.2.13.Final//netty-codec-http2-4.2.13.Final.jar +netty-codec-socks/4.2.13.Final//netty-codec-socks-4.2.13.Final.jar +netty-codec/4.2.13.Final//netty-codec-4.2.13.Final.jar +netty-common/4.2.13.Final//netty-common-4.2.13.Final.jar +netty-handler-proxy/4.2.13.Final//netty-handler-proxy-4.2.13.Final.jar +netty-handler/4.2.13.Final//netty-handler-4.2.13.Final.jar +netty-resolver-dns/4.2.13.Final//netty-resolver-dns-4.2.13.Final.jar +netty-resolver/4.2.13.Final//netty-resolver-4.2.13.Final.jar netty-tcnative-boringssl-static/2.0.76.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.76.Final-linux-aarch_64.jar netty-tcnative-boringssl-static/2.0.76.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.76.Final-linux-x86_64.jar netty-tcnative-boringssl-static/2.0.76.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.76.Final-osx-aarch_64.jar netty-tcnative-boringssl-static/2.0.76.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.76.Final-osx-x86_64.jar netty-tcnative-boringssl-static/2.0.76.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.76.Final-windows-x86_64.jar netty-tcnative-classes/2.0.76.Final//netty-tcnative-classes-2.0.76.Final.jar -netty-transport-classes-epoll/4.2.12.Final//netty-transport-classes-epoll-4.2.12.Final.jar -netty-transport-classes-kqueue/4.2.12.Final//netty-transport-classes-kqueue-4.2.12.Final.jar -netty-transport-native-epoll/4.2.12.Final/linux-aarch_64/netty-transport-native-epoll-4.2.12.Final-linux-aarch_64.jar -netty-transport-native-epoll/4.2.12.Final/linux-riscv64/netty-transport-native-epoll-4.2.12.Final-linux-riscv64.jar -netty-transport-native-epoll/4.2.12.Final/linux-x86_64/netty-transport-native-epoll-4.2.12.Final-linux-x86_64.jar -netty-transport-native-kqueue/4.2.12.Final/osx-aarch_64/netty-transport-native-kqueue-4.2.12.Final-osx-aarch_64.jar -netty-transport-native-kqueue/4.2.12.Final/osx-x86_64/netty-transport-native-kqueue-4.2.12.Final-osx-x86_64.jar -netty-transport-native-unix-common/4.2.12.Final//netty-transport-native-unix-common-4.2.12.Final.jar -netty-transport/4.2.12.Final//netty-transport-4.2.12.Final.jar +netty-transport-classes-epoll/4.2.13.Final//netty-transport-classes-epoll-4.2.13.Final.jar +netty-transport-classes-kqueue/4.2.13.Final//netty-transport-classes-kqueue-4.2.13.Final.jar +netty-transport-native-epoll/4.2.13.Final/linux-aarch_64/netty-transport-native-epoll-4.2.13.Final-linux-aarch_64.jar +netty-transport-native-epoll/4.2.13.Final/linux-riscv64/netty-transport-native-epoll-4.2.13.Final-linux-riscv64.jar +netty-transport-native-epoll/4.2.13.Final/linux-x86_64/netty-transport-native-epoll-4.2.13.Final-linux-x86_64.jar +netty-transport-native-kqueue/4.2.13.Final/osx-aarch_64/netty-transport-native-kqueue-4.2.13.Final-osx-aarch_64.jar +netty-transport-native-kqueue/4.2.13.Final/osx-x86_64/netty-transport-native-kqueue-4.2.13.Final-osx-x86_64.jar +netty-transport-native-unix-common/4.2.13.Final//netty-transport-native-unix-common-4.2.13.Final.jar +netty-transport/4.2.13.Final//netty-transport-4.2.13.Final.jar objenesis/3.5//objenesis-3.5.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.17.6//okio-1.17.6.jar diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 1cfc22acc2302..57cde202dde8d 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -70,6 +70,7 @@ RUN apt-get update && apt-get install -y \ software-properties-common \ wget \ zlib1g-dev \ + zstd \ && rm -rf /var/lib/apt/lists/* @@ -108,7 +109,7 @@ RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y \ python3.9 python3.9-distutils \ && rm -rf /var/lib/apt/lists/* -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 +RUN curl -sS https://bootstrap.pypa.io/pip/3.9/get-pip.py | python3.9 RUN python3.9 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow needs this RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \ python3.9 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \ diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index a780e173adb7b..27d7728099bbc 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -32,6 +32,21 @@ set -x SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)" DISTDIR="$SPARK_HOME/dist" +# The Apache LICENSE and NOTICE are copied into the Python and R package +# directories below so they are bundled into the source distributions. Remove +# them on exit so a failed build does not leave stray files behind. +function cleanup_dist_license_files { + rm -f "$SPARK_HOME/python/LICENSE" "$SPARK_HOME/python/NOTICE" \ + "$SPARK_HOME/R/pkg/LICENSE" "$SPARK_HOME/R/pkg/NOTICE" + # Restore the SparkR DESCRIPTION if a release build patched it in place (see + # the R packaging section). Guards against an interrupted build leaving the + # tracked DESCRIPTION modified. + if [ -f "$SPARK_HOME/R/DESCRIPTION.orig" ]; then + mv -f "$SPARK_HOME/R/DESCRIPTION.orig" "$SPARK_HOME/R/pkg/DESCRIPTION" + fi +} +trap cleanup_dist_license_files EXIT + MAKE_TGZ=false MAKE_PIP=false MAKE_R=false @@ -169,7 +184,6 @@ fi cd "$SPARK_HOME" if [ "$SBT_ENABLED" == "true" ] ; then - export NOLINT_ON_COMPILE=1 # Store the command as an array because $SBT variable might have spaces in it. # Normal quoting tricks don't work. # See: http://mywiki.wooledge.org/BashFAQ/050 @@ -204,6 +218,11 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars cp -r "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" +# SPARK-53327: Use the modified ResourceImpl.class in spark-catalyst which is compatible with Java 25 +if [ -f "$DISTDIR"/jars/datasketches-memory-3.0.2.jar ]; then + zip -d "$DISTDIR"/jars/datasketches-memory-3.0.2.jar org/apache/datasketches/memory/internal/ResourceImpl.class +fi + # Only create the yarn directory if the yarn artifacts were built. if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then mkdir "$DISTDIR/yarn" @@ -255,9 +274,39 @@ if [ "$MAKE_PIP" == "true" ]; then pushd "$SPARK_HOME/python" > /dev/null # Delete the egg info file if it exists, this can cache older setup files. rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" + # Ship the Apache LICENSE and NOTICE inside the PySpark source distributions + # (see MANIFEST.in). These are removed again after the sdists are built. + # + # The classic pyspark sdist bundles the assembly jars (packaging/classic/setup.py + # builds a deps/jars symlink farm), so it ships the binary LICENSE/NOTICE that + # enumerate the bundled third-party jars' licenses, mirroring the binary + # distribution above. The connect and client sdists bundle no jars and ship the + # plain source LICENSE/NOTICE. + if [ -e "$SPARK_HOME/LICENSE-binary" ]; then + cp "$SPARK_HOME/LICENSE-binary" LICENSE + cp "$SPARK_HOME/NOTICE-binary" NOTICE + else + cp "$SPARK_HOME/LICENSE" LICENSE + cp "$SPARK_HOME/NOTICE" NOTICE + fi python3 packaging/classic/setup.py sdist + + cp "$SPARK_HOME/LICENSE" LICENSE + cp "$SPARK_HOME/NOTICE" NOTICE python3 packaging/connect/setup.py sdist python3 packaging/client/setup.py sdist + rm -f LICENSE NOTICE + + # Guard against regressions: every PySpark sdist must contain LICENSE and NOTICE + # at the package root. The missing files were only caught by a Spark 4.2.0 RC1 + # vote -1 (SPARK-57393); fail the release build here instead of at vote time. + for f in dist/pyspark*.tar.gz; do + listing=$(tar tzf "$f") + for required in LICENSE NOTICE; do + grep -qE "^[^/]+/$required\$" <<< "$listing" || \ + { echo "ERROR: $f is missing $required at the package root"; exit 1; } + done + done popd > /dev/null else echo "Skipping building python distribution package" @@ -268,9 +317,33 @@ if [ "$MAKE_R" == "true" ]; then echo "Building R source package" R_PACKAGE_VERSION=`grep Version "$SPARK_HOME/R/pkg/DESCRIPTION" | awk '{print $NF}'` pushd "$SPARK_HOME/R" > /dev/null + # Ship the Apache LICENSE and NOTICE inside the SparkR source package. These + # are removed again after the package is built. + cp "$SPARK_HOME/LICENSE" pkg/LICENSE + cp "$SPARK_HOME/NOTICE" pkg/NOTICE + # Reference the bundled LICENSE from DESCRIPTION so `R CMD check --as-cran` does + # not emit "File LICENSE is not mentioned in the DESCRIPTION file". The committed + # DESCRIPTION is left untouched because SparkR CI runs check-cran.sh without the + # LICENSE file present; this edit is transient and restored after the build (and + # by the EXIT trap on failure). The backup lives outside pkg/ so R CMD check does + # not flag it as a non-standard file. NOTE: the "Non-standard file 'NOTICE'" note + # cannot be silenced this way and is expected. + cp pkg/DESCRIPTION "$SPARK_HOME/R/DESCRIPTION.orig" + sed 's/^License: Apache License (== 2.0)$/License: Apache License (== 2.0) + file LICENSE/' \ + "$SPARK_HOME/R/DESCRIPTION.orig" > pkg/DESCRIPTION # Build source package and run full checks # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME NO_TESTS=1 "$SPARK_HOME/R/check-cran.sh" + mv -f "$SPARK_HOME/R/DESCRIPTION.orig" pkg/DESCRIPTION + rm -f pkg/LICENSE pkg/NOTICE + + # Guard against regressions: the SparkR source package must contain LICENSE and + # NOTICE at the package root (SPARK-57393). + listing=$(tar tzf "SparkR_$R_PACKAGE_VERSION.tar.gz") + for required in LICENSE NOTICE; do + grep -qE "^[^/]+/$required\$" <<< "$listing" || \ + { echo "ERROR: SparkR source package is missing $required"; exit 1; } + done # Move R source package to match the Spark release version if the versions are not the same. # NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file diff --git a/dev/run-tests.py b/dev/run-tests.py index 0b7a90694385f..685621193dd66 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -644,7 +644,8 @@ def main(): run_build_tests() # spark build - build_apache_spark(build_tool, extra_profiles) + if os.environ.get("SKIP_SCALA_BUILD", "false") != "true": + build_apache_spark(build_tool, extra_profiles) # backwards compatibility checks if build_tool == "sbt": @@ -653,7 +654,8 @@ def main(): detect_binary_inop_with_mima(extra_profiles) # Since we did not build assembly/package before running dev/mima, we need to # do it here because the tests still rely on it; see SPARK-13294 for details. - build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) + if os.environ.get("SKIP_SCALA_BUILD", "false") != "true": + build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) # run the test suites run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags) diff --git a/dev/scalastyle b/dev/scalastyle index 0428453b62c81..09e6c2372614d 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -30,6 +30,62 @@ ERRORS=$(echo -e "q\n" \ if test ! -z "$ERRORS"; then echo -e "Scalastyle checks failed at following occurrences:\n$ERRORS" + # When running under GitHub Actions, also emit each scalastyle violation as + # a workflow `::error` annotation so it appears inline on the PR's "Files + # changed" tab. Without this, a violation cascades into ~7 red CI checks + # (Linters, Java 17/25 Maven build, Documentation generation, sparkr, + # Docker integration, TPC-DS) -- all needing catalyst to compile -- and + # each only surfaces a generic "exit code 1" with no file/line, forcing + # the user to download a full job log to find the actual violation. + if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then + # Strip ANSI color codes from the captured output before regex + # matching. Today sbt under awk's pipe is not a TTY and skips color, + # so the input is already plain. But if sbt color is ever forced + # (`-Dsbt.color=always`, custom CI shell), `\e[31m` would silently + # break every regex below. Cheap to harden. + ERRORS_PLAIN=$(printf '%s' "$ERRORS" | sed -E $'s/\x1b\\[[0-9;]*[A-Za-z]//g') + # Helper: emit one `::error` annotation. Centralised so the two regex + # branches below stay short. + emit_annotation() { + local file="$1" lineno="$2" msg="$3" + # Strip the GitHub Actions workspace prefix so the annotation + # references the path as it appears in the repo. + local file_rel="${file#${GITHUB_WORKSPACE:-}/}" + # Escape the few characters GitHub reserves in annotation values: + # %, \r, \n. (`,` and `:` need not be escaped in the message body, + # only inside parameter values, which we don't use.) + local msg_escaped="${msg//%/%25}" + msg_escaped="${msg_escaped//$'\r'/%0D}" + msg_escaped="${msg_escaped//$'\n'/%0A}" + printf '::error file=%s,line=%s,title=Scalastyle::%s\n' \ + "$file_rel" "$lineno" "$msg_escaped" + } + printf '%s\n' "$ERRORS_PLAIN" | while IFS= read -r raw; do + # Two scalastyle output formats reach us: + # + # (a) scalastyle's native console writer (`Tasks.doScalastyle` when + # invoked by the explicit `scalastyle` / `test:scalastyle` + # tasks): + # error file= message= line= [column=] + # The path has no spaces, the message can; `column=` is + # appended for checkers that report a column (e.g. + # `WhitespaceEndOfLineChecker`) and absent otherwise. + # + # (b) sbt's logger format, used when `Tasks.doScalastyle` writes + # through `streams.value.log.error(...)` -- which is what the + # explicit `scalastyle` / `test:scalastyle` tasks invoked by + # this script do, and so this is the format we see in CI: + # [error] :: + # The leading `[error] ` plus a single `::` (with no + # `::` follow-up) is what tells it apart from a regular + # Scala compile error of shape `[error] ::: `. + if [[ "$raw" =~ ^error[[:space:]]+file=([^[:space:]]+)[[:space:]]+message=(.*)[[:space:]]+line=([0-9]+)([[:space:]]+column=[0-9]+)?$ ]]; then + emit_annotation "${BASH_REMATCH[1]}" "${BASH_REMATCH[3]}" "${BASH_REMATCH[2]}" + elif [[ "$raw" =~ ^\[error\][[:space:]]+(/[^:[:space:]]+):([0-9]+):[[:space:]]+(.+)$ ]]; then + emit_annotation "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" "${BASH_REMATCH[3]}" + fi + done + fi exit 1 else echo -e "Scalastyle checks passed." diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile index 3b02e2ae1cffe..f58168627dd12 100644 --- a/dev/spark-test-image/docs/Dockerfile +++ b/dev/spark-test-image/docs/Dockerfile @@ -70,6 +70,7 @@ RUN apt-get update && apt-get install -y \ software-properties-common \ wget \ zlib1g-dev \ + zstd \ && apt-get autoremove --purge -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/dev/spark-test-image/lint/Dockerfile b/dev/spark-test-image/lint/Dockerfile index 14a0e4fcfe75b..b7cecae654cb9 100644 --- a/dev/spark-test-image/lint/Dockerfile +++ b/dev/spark-test-image/lint/Dockerfile @@ -63,6 +63,7 @@ RUN apt-get update && apt-get install -y \ software-properties-common \ wget \ zlib1g-dev \ + zstd \ && apt-get autoremove --purge -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -95,8 +96,8 @@ RUN python3.12 -m pip install \ 'mypy==1.19.1' \ 'numpy==2.4.1' \ 'numpydoc' \ - 'pandas' \ - 'pandas-stubs' \ + 'pandas==2.3.3' \ + 'pandas-stubs==2.3.3.260113' \ 'plotly>=4.8' \ 'pyarrow>=23.0.0' \ 'pytest-mypy-plugins==1.9.3' \ diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile index 8e5044aeb954e..e39e9fac70d22 100644 --- a/dev/spark-test-image/python-311/Dockerfile +++ b/dev/spark-test-image/python-311/Dockerfile @@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \ pkg-config \ tzdata \ software-properties-common \ - zlib1g-dev + zlib1g-dev \ + zstd # Install Python 3.11 RUN add-apt-repository ppa:deadsnakes/ppa diff --git a/dev/spark-test-image/python-312-classic-only/Dockerfile b/dev/spark-test-image/python-312-classic-only/Dockerfile index da2b99cd7838d..ceb4694b2dc9d 100644 --- a/dev/spark-test-image/python-312-classic-only/Dockerfile +++ b/dev/spark-test-image/python-312-classic-only/Dockerfile @@ -49,6 +49,7 @@ RUN apt-get update && apt-get install -y \ tzdata \ software-properties-common \ zlib1g-dev \ + zstd \ && apt-get autoremove --purge -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/dev/spark-test-image/python-312-pandas-3/Dockerfile b/dev/spark-test-image/python-312-pandas-3/Dockerfile index c54a8c284a6b9..e2a2c189df15b 100644 --- a/dev/spark-test-image/python-312-pandas-3/Dockerfile +++ b/dev/spark-test-image/python-312-pandas-3/Dockerfile @@ -52,6 +52,7 @@ RUN apt-get update && apt-get install -y \ tzdata \ software-properties-common \ zlib1g-dev \ + zstd \ && apt-get autoremove --purge -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile index 8eee9e414f1d8..72b40510b8db5 100644 --- a/dev/spark-test-image/python-312/Dockerfile +++ b/dev/spark-test-image/python-312/Dockerfile @@ -48,6 +48,7 @@ RUN apt-get update && apt-get install -y \ pkg-config \ tzdata \ software-properties-common \ + zstd \ zlib1g-dev \ && apt-get autoremove --purge -y \ && apt-get clean \ diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile index 6cfdd2d5a86ed..c13e364f15897 100644 --- a/dev/spark-test-image/python-313/Dockerfile +++ b/dev/spark-test-image/python-313/Dockerfile @@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \ pkg-config \ tzdata \ software-properties-common \ - zlib1g-dev + zlib1g-dev \ + zstd # Install Python 3.13 RUN add-apt-repository ppa:deadsnakes/ppa diff --git a/dev/spark-test-image/python-314-nogil/Dockerfile b/dev/spark-test-image/python-314-nogil/Dockerfile index edfea31729928..6dea9c2fc35ed 100644 --- a/dev/spark-test-image/python-314-nogil/Dockerfile +++ b/dev/spark-test-image/python-314-nogil/Dockerfile @@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \ pkg-config \ tzdata \ software-properties-common \ - zlib1g-dev + zlib1g-dev \ + zstd # Install Python 3.14 (no GIL) RUN add-apt-repository ppa:deadsnakes/ppa diff --git a/dev/spark-test-image/python-314/Dockerfile b/dev/spark-test-image/python-314/Dockerfile index b6bc76c6b93b4..2f3570ec06876 100644 --- a/dev/spark-test-image/python-314/Dockerfile +++ b/dev/spark-test-image/python-314/Dockerfile @@ -46,7 +46,8 @@ RUN apt-get update && apt-get install -y \ pkg-config \ tzdata \ software-properties-common \ - zlib1g-dev + zlib1g-dev \ + zstd # Install Python 3.14 RUN add-apt-repository ppa:deadsnakes/ppa diff --git a/dev/spark-test-image/python-minimum/Dockerfile b/dev/spark-test-image/python-minimum/Dockerfile index 72abd5f5811b9..d2e4a83ce81e1 100644 --- a/dev/spark-test-image/python-minimum/Dockerfile +++ b/dev/spark-test-image/python-minimum/Dockerfile @@ -47,7 +47,8 @@ RUN apt-get update && apt-get install -y \ pkg-config \ tzdata \ software-properties-common \ - zlib1g-dev + zlib1g-dev \ + zstd # Install Python 3.10 RUN add-apt-repository ppa:deadsnakes/ppa diff --git a/dev/spark-test-image/python-ps-minimum/Dockerfile b/dev/spark-test-image/python-ps-minimum/Dockerfile deleted file mode 100644 index 0f970202edd52..0000000000000 --- a/dev/spark-test-image/python-ps-minimum/Dockerfile +++ /dev/null @@ -1,70 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Image for building and testing Spark branches. Based on Ubuntu 24.04. -# See also in https://hub.docker.com/_/ubuntu -FROM ubuntu:noble -LABEL org.opencontainers.image.authors="Apache Spark project " -LABEL org.opencontainers.image.licenses="Apache-2.0" -LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For Pandas API on Spark with old dependencies" -# Overwrite this label to avoid exposing the underlying Ubuntu OS version label -LABEL org.opencontainers.image.version="" - -ENV FULL_REFRESH_DATE=20260210 - -ENV DEBIAN_FRONTEND=noninteractive -ENV DEBCONF_NONINTERACTIVE_SEEN=true - -RUN printf 'Types: deb\nURIs: https://mirrors.edge.kernel.org/ubuntu\nSuites: noble noble-updates noble-security\nComponents: main restricted universe multiverse\nSigned-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg\n' > /etc/apt/sources.list.d/mirror.sources - -# Should keep the installation consistent with https://apache.github.io/spark/api/python/getting_started/install.html -RUN apt-get update && apt-get install -y \ - build-essential \ - ca-certificates \ - curl \ - gfortran \ - git \ - gnupg \ - libgit2-dev \ - liblapack-dev \ - libopenblas-dev \ - libssl-dev \ - openjdk-17-jdk-headless \ - pkg-config \ - tzdata \ - software-properties-common \ - zlib1g-dev - -# Install Python 3.10 -RUN add-apt-repository ppa:deadsnakes/ppa -RUN apt-get update && apt-get install -y \ - python3.10 \ - python3.10-venv \ - && apt-get autoremove --purge -y \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Setup virtual environment -ENV VIRTUAL_ENV=/opt/spark-venv -RUN python3.10 -m venv $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -ARG BASIC_PIP_PKGS="pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting psutil" -ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf==6.33.5" - -RUN python3.10 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \ - python3.10 -m pip cache purge diff --git a/dev/spark-test-image/sparkr/Dockerfile b/dev/spark-test-image/sparkr/Dockerfile index 07816add74fca..8a03095aef7ee 100644 --- a/dev/spark-test-image/sparkr/Dockerfile +++ b/dev/spark-test-image/sparkr/Dockerfile @@ -62,6 +62,7 @@ RUN apt-get update && apt-get install -y \ software-properties-common \ wget \ zlib1g-dev \ + zstd \ && rm -rf /var/lib/apt/lists/* RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index bd7d1f55aaee5..c26529951cc78 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -449,7 +449,7 @@ def __hash__(self): pipelines = Module( name="pipelines", - dependencies=[], + dependencies=[sql], source_file_regexes=["sql/pipelines"], sbt_test_goals=[ "pipelines/test", @@ -611,6 +611,7 @@ def __hash__(self): "pyspark.sql.tests.test_readwriter", "pyspark.sql.tests.test_serde", "pyspark.sql.tests.test_session", + "pyspark.sql.tests.test_nearest_by_join", "pyspark.sql.tests.test_subquery", "pyspark.sql.tests.test_types", "pyspark.sql.tests.test_geographytype", @@ -1173,6 +1174,7 @@ def __hash__(self): "pyspark.sql.tests.connect.test_parity_observation", "pyspark.sql.tests.connect.test_parity_repartition", "pyspark.sql.tests.connect.test_parity_stat", + "pyspark.sql.tests.connect.test_parity_nearest_by_join", "pyspark.sql.tests.connect.test_parity_subquery", "pyspark.sql.tests.connect.test_parity_types", "pyspark.sql.tests.connect.test_parity_column", @@ -1187,7 +1189,6 @@ def __hash__(self): "pyspark.sql.tests.connect.test_parity_python_datasource", "pyspark.sql.tests.connect.test_parity_frame_plot", "pyspark.sql.tests.connect.test_parity_frame_plot_plotly", - "pyspark.sql.tests.connect.test_session", "pyspark.sql.tests.connect.test_utils", "pyspark.sql.tests.connect.client.test_artifact", "pyspark.sql.tests.connect.client.test_artifact_localcluster", @@ -1195,7 +1196,7 @@ def __hash__(self): "pyspark.sql.tests.connect.client.test_client_call_stack_trace", "pyspark.sql.tests.connect.client.test_client_retries", "pyspark.sql.tests.connect.client.test_reattach", - "pyspark.sql.tests.connect.test_resources", + "pyspark.sql.tests.connect.test_parity_resources", "pyspark.sql.tests.connect.shell.test_progress", "pyspark.sql.tests.connect.test_df_debug", "pyspark.sql.tests.connect.arrow.test_parity_arrow", @@ -1672,6 +1673,40 @@ def __hash__(self): test_tags=["org.apache.spark.tags.DockerTest"], ) + +# dev_tools is a pseudo module that contains all the dev related files that +# won't impact the CI build and tests (except for CI which is forced to +# run anyway). +# This module is created so modifying files in this module won't trigger any +# tests to run. +dev_tools = Module( + name="dev-tools", + dependencies=[], + source_file_regexes=[ + ".*README.md", + ".*AGENTS.md", + r".*\.gitignore", + "CONTRIBUTING.md", + ".asf.yaml", + "SECURITY.md", + "NOTICE-binary", + "LICENSE-binary", + "ui-test/package.json", + "ui-test/package-lock.json", + "scalastyle-config.xml", + "dev/checkstyle.xml", + "dev/checkstyle-suppressions.xml", + "dev/spark-test-image/lint/Dockerfile", + "dev/lint-python", + "dev/lint-scala", + "dev/reformat-python", + "dev/structured_logging_style.py", + "dev/merge_spark_pr.py", + "dev/create_spark_jira.py", + "dev/create-release/", + ], +) + # The root module is a dummy module which is used to run all of the tests. # No other modules should directly depend on this module. root = Module( diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py index a66c0e70cb567..fff418bdb3489 100755 --- a/dev/sparktestsupport/utils.py +++ b/dev/sparktestsupport/utils.py @@ -33,26 +33,15 @@ def determine_modules_for_files(filenames): """ Given a list of filenames, return the set of modules that contain those files. If a file is not associated with a more specific submodule, then this method will consider that - file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions, - and `README.md` is always ignored. + file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions. >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"])) ['pyspark-core', 'pyspark-errors', 'sql'] >>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])] ['root'] - >>> [x.name for x in determine_modules_for_files(["sql/README.md"])] - [] """ changed_modules = set() for filename in filenames: - if filename.endswith("README.md"): - continue - if filename in ( - "scalastyle-config.xml", - "dev/checkstyle.xml", - "dev/checkstyle-suppressions.xml", - ): - continue if ("GITHUB_ACTIONS" not in os.environ) and filename.startswith(".github"): continue matched_at_least_one_module = False @@ -115,8 +104,8 @@ def determine_modules_to_test(changed_modules, deduplicated=True): >>> sorted([x.name for x in determine_modules_to_test([modules.sql])]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', - 'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib', - 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', + 'mllib', 'pipelines', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', + 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect', 'pyspark-pipelines', 'pyspark-sql', 'pyspark-structured-streaming', 'pyspark-structured-streaming-connect', 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] @@ -124,8 +113,8 @@ def determine_modules_to_test(changed_modules, deduplicated=True): ... [modules.sparkr, modules.sql], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', - 'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib', - 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', + 'mllib', 'pipelines', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-ml-connect', + 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect', 'pyspark-pipelines', 'pyspark-sql', 'pyspark-structured-streaming', 'pyspark-structured-streaming-connect', 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] @@ -133,9 +122,9 @@ def determine_modules_to_test(changed_modules, deduplicated=True): ... [modules.sql, modules.core], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'catalyst', 'connect', 'core', 'docker-integration-tests', 'examples', 'graphx', - 'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 'pyspark-connect', - 'pyspark-core', 'pyspark-install', 'pyspark-ml', 'pyspark-ml-connect', 'pyspark-mllib', - 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', + 'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'pipelines', 'protobuf', + 'pyspark-connect', 'pyspark-core', 'pyspark-install', 'pyspark-ml', 'pyspark-ml-connect', + 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect', 'pyspark-pipelines', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'pyspark-structured-streaming', 'pyspark-structured-streaming-connect', 'pyspark-testing', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', diff --git a/docs/_config.yml b/docs/_config.yml index db0de55f3b519..5109d8d338a78 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,18 +19,16 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala. -SPARK_VERSION: 4.2.0.1-4.3.0-0 -SPARK_VERSION_SHORT: 4.2.0.1-4.3.0-0 +SPARK_VERSION: 4.2.0.1-4.3.0-1 +SPARK_VERSION_SHORT: 4.2.0.1-4.3.0-1 SCALA_BINARY_VERSION: "2.13" SCALA_VERSION: "2.13.18" SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK SPARK_GITHUB_URL: https://github.com/apache/spark -# Before a new release, we should: -# 1. update the `version` array for the new Spark documentation -# on https://github.com/algolia/docsearch-configs/blob/master/configs/apache_spark.json. -# 2. update the value of `facetFilters.version` in `algoliaOptions` on the new release branch. -# Otherwise, after release, the search results are always based on the latest documentation -# (https://spark.apache.org/docs/latest/) even when visiting the documentation of previous releases. +# The DocSearch index is maintained by the Algolia crawler at https://crawler.algolia.com/. +# The crawler indexes only https://spark.apache.org/docs/latest/ and tags every page with +# `version:latest`. All release branches share this single index, so `facetFilters` stays +# pinned to `version:latest` everywhere and no per-release update is required. DOCSEARCH_SCRIPT: | docsearch({ apiKey: 'd62f962a82bc9abb53471cb7b89da35e', @@ -39,7 +37,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:4.2.0.1-4.3.0-0"] + 'facetFilters': ["version:4.2.0.1-4.3.0-1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb index e6719c4bed7e3..429cef5aa026c 100644 --- a/docs/_plugins/build_api_docs.rb +++ b/docs/_plugins/build_api_docs.rb @@ -45,13 +45,11 @@ def build_spark_if_necessary print_header "Building Spark." cd(SPARK_PROJECT_ROOT) - # Maven may leave POM-only org.hamcrest:hamcrest-core trees under ~/.m2; SBT/Coursier then - # fails with "file:.../hamcrest-core-*.jar: not found". Clear before invoking SBT. - hamcrest_m2 = File.join(Dir.home, '.m2/repository/org/hamcrest/hamcrest-core') - FileUtils.rm_rf(hamcrest_m2) command = "NO_PROVIDED_SPARK_JARS=0 build/sbt -Phive -Pkinesis-asl clean package" puts "Running '#{command}'; this may take a few minutes..." system(command) || raise("Failed to build Spark") + # SPARK-53327: Use the modified ResourceImpl.class in spark-catalyst which is compatible with Java 25 + system("zip -d assembly/target/scala-2.13/jars/datasketches-memory-3.0.2.jar org/apache/datasketches/memory/internal/ResourceImpl.class") $spark_package_is_built = true end @@ -133,101 +131,147 @@ def build_spark_scala_and_java_docs_if_necessary command = "build/sbt -Pkinesis-asl unidoc" puts "Running '#{command}'..." - # Tee sbt output to a log file so we can diagnose failures. The most common - # unidoc failure is a javadoc crash mid-stream while generating HTML for a - # specific class, buried under ~100 benign errors on genjavadoc-generated - # Java stubs (e.g. target/java/org/apache/spark/ErrorInfo.java). Without the - # diagnostic below, the real culprit -- the source whose doc tripped javadoc - # -- is effectively invisible in the CI log. - log_file = File.join(SPARK_PROJECT_ROOT, "target", "unidoc-build.log") - mkdir_p(File.dirname(log_file)) - success = stream_and_capture(command, log_file) - unless success - diagnose_unidoc_failure(log_file) - raise("Unidoc generation failed") - end -end -# Runs `command`, streaming every line to both stdout and `log_file`. Returns -# true iff the command exited 0. Ruby-only; no shell pipefail reliance. -def stream_and_capture(command, log_file) - File.open(log_file, 'w') do |f| - IO.popen("#{command} 2>&1", 'r') do |pipe| - pipe.each_line do |line| + # Two filter passes on the unidoc output, plus an additive fatal-error summary: + # + # 1. Genjavadoc-stub diagnostic blocks (~28 `[error]` lines on stubs under + # `target/java/`, plus 3-5 continuation lines each). Inert because + # `--ignore-source-errors` is set; matched by message text so legitimate + # doclint diagnostics on stub paths still pass through. + # + # 2. `-verbose` progress lines (~13K total): `Loading source file ...`, + # `[parsing started/completed ...]`, `[loading /path/X.class]`, + # `Generating .../X.html`. These are dominant in the log when `-verbose` + # is set (which it is in `JavaUnidoc / unidoc / javacOptions` to surface + # per-file `error: reference not found` diagnostics) but carry no signal + # of their own. Suppressing them brings the visible log from ~17K to ~5K + # lines on a typical run while leaving every diagnostic untouched. + # + # 3. Fatal-error summary (additive, drops no log lines). The filtered log is + # still ~4K lines and most `error:` text in it is non-fatal source-loading + # chatter, so the build-failing diagnostics are hard to spot. After the + # pipe closes, we print a `Fatal javadoc errors (N): ...` block and emit + # `::error file=,line=::` GitHub Actions annotations so they surface in the + # PR check panel. Captured strictly within the Standard Doclet phase + # bracketed by `Building tree for all the packages and classes...` and + # `Building index for all classes...`, which is where doclint diagnostics + # are emitted -- this matches what javadoc counts toward exit code 1. + # Self-checked against javadoc's own `N errors` summary line; a mismatch + # emits a `::warning::` so future phase-marker drift is visible. + ansi = /\e\[[0-9;]*[A-Za-z]/ + stub_header = %r{ + \[(?:error|warn)\]\s+ + \S*?/target/java/\S+\.java:\d+(?::\d+)?:\s+ + error:\s+ + (?:cannot\s+find\s+symbol + |illegal\s+combination\s+of\s+modifiers + |non-static\s+type\s+variable\b + |.*?\s+is\s+not\s+public\s+in\s+\S+;\s+cannot\s+be\s+accessed\s+from\s+outside\s+package) + }x + stub_cont = %r{\A\s*\[(?:error|warn)\]\s+(?!/\S+\.java:\d+(?::\d+)?:\s)} + verbose_line = %r{ + \[(?:error|warn)\]\s+ + (?:Loading\s+source\s+file\s + |\[parsing\s+(?:started|completed)\s + |\[loading\s + |\[checking\s + |\[wrote\s + |Generating\s+\S+\.html + ) + }x + + # Doclint phase tracking for the trailing summary. Standard Doclet bookends the + # phase that produces build-failing diagnostics with these marker lines; any + # `error:` outside this window is source-loading noise that does not contribute + # to javadoc's exit code. The summary below captures only the fatal ones and + # re-emits them as GitHub Actions annotations so they surface in the PR check + # panel instead of being buried in a 4K-line log. + doclint_start = %r{\bBuilding\s+tree\s+for\s+all\s+the\s+packages\s+and\s+classes\b} + doclint_end = %r{\bBuilding\s+index\s+for\s+all\s+classes\b} + doclint_diag = %r{\A\[warn\]\s+(?\S+):(?\d+)(?::\d+)?:\s+error:\s+(?.+?)\s*\z} + doclint_cont = %r{\A\[warn\]\s(?!\S+:\d+(?::\d+)?:\s+error:)(?.*?)\s*\z} + doclint_summary = %r{\A\[warn\]\s+(?[\d,]+)\s+errors?\s*\z} + + in_stub = false + in_doclint = false + fatal_diagnostics = [] + pending_context_lines = 0 # snippet + caret lines that follow each diag header + reported_error_count = nil + + IO.popen("#{command} 2>&1", 'r') do |pipe| + pipe.each_line do |line| + plain = line.gsub(ansi, '') + + if plain =~ doclint_start + in_doclint = true + elsif in_doclint && plain =~ doclint_end + in_doclint = false + pending_context_lines = 0 + end + + if in_doclint && (m = plain.match(doclint_diag)) + fatal_diagnostics << { + path: m[:path], line: m[:lineno], msg: m[:msg], context: [] + } + pending_context_lines = 2 + elsif in_doclint && pending_context_lines > 0 && + (m = plain.match(doclint_cont)) && !fatal_diagnostics.empty? + fatal_diagnostics.last[:context] << m[:content] + pending_context_lines -= 1 + end + + if reported_error_count.nil? && (m = plain.match(doclint_summary)) + reported_error_count = m[:count].delete(',').to_i + end + + if plain =~ verbose_line + in_stub = false + # suppress -verbose progress line + elsif plain =~ stub_header + in_stub = true + elsif in_stub && plain =~ stub_cont + # continuation of a stub block; suppress + else + in_stub = false $stdout.write(line) $stdout.flush - f.write(line) end end end - $?.success? -end -# Scans the captured unidoc log and prints a pointer to the most likely -# culprit source file. The heuristic: when javadoc dies mid-HTML-generation, -# the last "Generating .../X.html" line before "javadoc exited with exit code" -# names the class that tripped it. Prints nothing actionable if the failure -# mode doesn't match (e.g. a scaladoc error), in which case the full log above -# already shows what's wrong. -def diagnose_unidoc_failure(log_file) - return unless File.exist?(log_file) - begin - lines = File.readlines(log_file) - - javadoc_exit_idx = lines.rindex { |l| l.include?("javadoc exited with exit code") } - last_generating = nil - if javadoc_exit_idx - # Strip ANSI color codes so the regex matches sbt-coloured output too. - ansi = /\e\[[0-9;]*[A-Za-z]/ - lines[0...javadoc_exit_idx].reverse_each do |line| - if line.gsub(ansi, '') =~ %r{Generating .+/javaunidoc/(\S+?\.html)\.\.\.} - last_generating = $1 - break - end - end + unless fatal_diagnostics.empty? + bar = "=" * 72 + puts "" + puts bar + puts "Fatal javadoc errors (#{fatal_diagnostics.size}):" + puts bar + fatal_diagnostics.each_with_index do |d, i| + puts " #{i + 1}. #{d[:path]}:#{d[:line]}: #{d[:msg]}" + d[:context].each { |c| puts " #{c}" } end - - banner = "=" * 78 - $stderr.puts "" - $stderr.puts banner - $stderr.puts "Unidoc failed -- diagnostic summary" - $stderr.puts banner - if last_generating - class_path = last_generating.sub(/\.html$/, '') - class_name = class_path.tr('/', '.') - $stderr.puts "" - $stderr.puts " Javadoc crashed while generating: #{last_generating}" - $stderr.puts " Likely culprit: doc comment in #{class_name}" - $stderr.puts "" - $stderr.puts " Javadoc can hard-exit (not just warn) on specific scaladoc" - $stderr.puts " patterns once they have been passed through genjavadoc --" - $stderr.puts " wiki-style `[[Class]]` / `[[method]]` links or inline-backticked" - $stderr.puts " code refs in the Scala source for the class above are common" - $stderr.puts " triggers. Start by auditing any recent doc-string changes in" - $stderr.puts " that source file." - $stderr.puts "" - $stderr.puts " NOTE: the '[error]' lines above on files under" - $stderr.puts " target/java/... are benign genjavadoc stubs -- every PR" - $stderr.puts " emits them and they do not cause the exit. Ignore them." - elsif javadoc_exit_idx - $stderr.puts "" - $stderr.puts " Javadoc exited but no class HTML generation was in progress;" - $stderr.puts " the crash predates HTML output -- likely a CLI / classpath /" - $stderr.puts " setup issue. See the full sbt output above." - else - $stderr.puts "" - $stderr.puts " Could not locate a 'javadoc exited with exit code' marker in" - $stderr.puts " the log; the failure is likely outside the javaunidoc step" - $stderr.puts " (scaladoc / sbt / build env). See the full sbt output above." + puts bar + puts "" + + # GitHub Actions inline annotations. `%`, `\r`, `\n` require URL-style + # escaping per the workflow command spec; newlines render as multiple + # lines inside the annotation, so the source snippet and caret display + # under the error message in the PR check panel. + project_root = SPARK_PROJECT_ROOT + '/' + fatal_diagnostics.each do |d| + rel = d[:path].start_with?(project_root) ? d[:path][project_root.length..] : d[:path] + full = ([d[:msg]] + d[:context]).join("\n") + enc = full.gsub(/[%\r\n]/, '%' => '%25', "\r" => '%0D', "\n" => '%0A') + puts "::error file=#{rel},line=#{d[:line]},title=javadoc::#{enc}" end - $stderr.puts banner - $stderr.puts "" - rescue => e - # Never let the diagnostic helper itself obscure the underlying unidoc - # failure: if anything here goes wrong (e.g. encoding error reading the - # log), report it briefly and let the caller raise the real error. - $stderr.puts "(diagnostic helper failed: #{e.class}: #{e.message})" end + + if reported_error_count && reported_error_count != fatal_diagnostics.size + puts "::warning::Javadoc reported #{reported_error_count} errors but " \ + "build_api_docs.rb captured #{fatal_diagnostics.size}. The doclint " \ + "phase markers may have shifted; please update build_api_docs.rb." + end + + raise("Unidoc generation failed") unless $?.success? end def build_scala_and_java_docs diff --git a/docs/building-spark.md b/docs/building-spark.md index a2e3125be8d1d..f64304dd92fd3 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -27,7 +27,7 @@ license: | ## Apache Maven The Maven-based build is the build of reference for Apache Spark. -Building Spark using Maven requires Maven 3.9.15 and Java 17/21. +Building Spark using Maven requires Maven 3.9.15 and Java 17/21/25. Spark requires Scala 2.13; support for Scala 2.12 was removed in Spark 4.0.0. ### Setting up Maven's Memory Usage diff --git a/docs/declarative-pipelines-programming-guide.md b/docs/declarative-pipelines-programming-guide.md index c5d18a7cb71be..e1c2c078212ae 100644 --- a/docs/declarative-pipelines-programming-guide.md +++ b/docs/declarative-pipelines-programming-guide.md @@ -180,6 +180,33 @@ Your pipelines implemented with the Python API must import this module. It's rec from pyspark import pipelines as dp ``` +### The Spark Session in Python Pipelines + +In Spark 4.1, every pipeline file had to declare `spark = SparkSession.active()` explicitly. Starting in Spark 4.2, the framework injects spark into each pipeline file's module namespace, so the explicit assignment is no longer required. + +```python +from pyspark import pipelines as dp + +@dp.materialized_view +def my_view(): + return spark.range(10) +``` + +Pipeline files that still include `spark = SparkSession.active()` continue to work correctly. However, if you do assign the session explicitly, `SparkSession.active()` is the only supported way to do so. For example, `SparkSession.builder.config(...).getOrCreate()` mutates session config, which is blocked in SDP. + +Note that without the explicit assignment, many tools and editors may consider `spark` and undefined name. To address that, you can add `spark: SparkSession` at module scope. SDP will still inject the actual session before the module runs, so this only documents the type for static analysis. + +```python +from pyspark import pipelines as dp +from pyspark.sql import SparkSession + +spark: SparkSession + +@dp.materialized_view +def my_view(): + return spark.range(10) +``` + ### Creating a Materialized View in Python The `@dp.materialized_view` decorator tells SDP to create a materialized view based on the results of a function that performs a batch read: diff --git a/docs/img/AllJobsPage.png b/docs/img/AllJobsPage.png new file mode 100644 index 0000000000000..19e1acc8cecf4 Binary files /dev/null and b/docs/img/AllJobsPage.png differ diff --git a/docs/img/AllJobsPageDetail1.png b/docs/img/AllJobsPageDetail1.png deleted file mode 100644 index de7e8c8883328..0000000000000 Binary files a/docs/img/AllJobsPageDetail1.png and /dev/null differ diff --git a/docs/img/AllJobsPageDetail2.png b/docs/img/AllJobsPageDetail2.png deleted file mode 100644 index b7203b2e66586..0000000000000 Binary files a/docs/img/AllJobsPageDetail2.png and /dev/null differ diff --git a/docs/img/AllJobsPageDetail3.png b/docs/img/AllJobsPageDetail3.png deleted file mode 100644 index 75b7caec119b2..0000000000000 Binary files a/docs/img/AllJobsPageDetail3.png and /dev/null differ diff --git a/docs/img/AllStagesPage.png b/docs/img/AllStagesPage.png new file mode 100644 index 0000000000000..52b2882f60abb Binary files /dev/null and b/docs/img/AllStagesPage.png differ diff --git a/docs/img/AllStagesPageDetail1.png b/docs/img/AllStagesPageDetail1.png deleted file mode 100644 index ac3c48b5a9a16..0000000000000 Binary files a/docs/img/AllStagesPageDetail1.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail2.png b/docs/img/AllStagesPageDetail2.png deleted file mode 100644 index 41d4165b92988..0000000000000 Binary files a/docs/img/AllStagesPageDetail2.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail3.png b/docs/img/AllStagesPageDetail3.png deleted file mode 100644 index fd5267aa4a1c4..0000000000000 Binary files a/docs/img/AllStagesPageDetail3.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail4.png b/docs/img/AllStagesPageDetail4.png deleted file mode 100644 index 2f038b3d6196b..0000000000000 Binary files a/docs/img/AllStagesPageDetail4.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail5.png b/docs/img/AllStagesPageDetail5.png deleted file mode 100644 index 95d1f0e7f3bea..0000000000000 Binary files a/docs/img/AllStagesPageDetail5.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail6.png b/docs/img/AllStagesPageDetail6.png deleted file mode 100644 index 1c4ec1594e031..0000000000000 Binary files a/docs/img/AllStagesPageDetail6.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail7.png b/docs/img/AllStagesPageDetail7.png deleted file mode 100644 index 6ab37481aa157..0000000000000 Binary files a/docs/img/AllStagesPageDetail7.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail8.png b/docs/img/AllStagesPageDetail8.png deleted file mode 100644 index a60745c27b166..0000000000000 Binary files a/docs/img/AllStagesPageDetail8.png and /dev/null differ diff --git a/docs/img/AllStagesPageDetail9.png b/docs/img/AllStagesPageDetail9.png deleted file mode 100644 index c471320cd9bbc..0000000000000 Binary files a/docs/img/AllStagesPageDetail9.png and /dev/null differ diff --git a/docs/img/JobPage.png b/docs/img/JobPage.png new file mode 100644 index 0000000000000..594bdcd30d35e Binary files /dev/null and b/docs/img/JobPage.png differ diff --git a/docs/img/JobPageDetail1.png b/docs/img/JobPageDetail1.png deleted file mode 100644 index 1ee741d1f09d0..0000000000000 Binary files a/docs/img/JobPageDetail1.png and /dev/null differ diff --git a/docs/img/JobPageDetail2.png b/docs/img/JobPageDetail2.png deleted file mode 100644 index 5eb529eb7c275..0000000000000 Binary files a/docs/img/JobPageDetail2.png and /dev/null differ diff --git a/docs/img/JobPageDetail3.png b/docs/img/JobPageDetail3.png deleted file mode 100644 index 9f691e4ed2b6b..0000000000000 Binary files a/docs/img/JobPageDetail3.png and /dev/null differ diff --git a/docs/img/StagePage.png b/docs/img/StagePage.png new file mode 100644 index 0000000000000..9cbabd9eacf82 Binary files /dev/null and b/docs/img/StagePage.png differ diff --git a/docs/img/webui-env-class.png b/docs/img/webui-env-class.png deleted file mode 100644 index e57dada528d1e..0000000000000 Binary files a/docs/img/webui-env-class.png and /dev/null differ diff --git a/docs/img/webui-env-hadoop.png b/docs/img/webui-env-hadoop.png deleted file mode 100644 index e4ae232d18217..0000000000000 Binary files a/docs/img/webui-env-hadoop.png and /dev/null differ diff --git a/docs/img/webui-env-sys.png b/docs/img/webui-env-sys.png deleted file mode 100644 index e7d57fe1a84db..0000000000000 Binary files a/docs/img/webui-env-sys.png and /dev/null differ diff --git a/docs/img/webui-env-tab.png b/docs/img/webui-env-tab.png index d9bfc1d4adad0..2ef145ca3bf89 100644 Binary files a/docs/img/webui-env-tab.png and b/docs/img/webui-env-tab.png differ diff --git a/docs/img/webui-exe-err.png b/docs/img/webui-exe-err.png deleted file mode 100644 index 2fb11638faf74..0000000000000 Binary files a/docs/img/webui-exe-err.png and /dev/null differ diff --git a/docs/img/webui-exe-tab.png b/docs/img/webui-exe-tab.png index 8b835fd1f9740..287ba238677db 100644 Binary files a/docs/img/webui-exe-tab.png and b/docs/img/webui-exe-tab.png differ diff --git a/docs/img/webui-exe-thread.png b/docs/img/webui-exe-thread.png deleted file mode 100644 index 136d274159e16..0000000000000 Binary files a/docs/img/webui-exe-thread.png and /dev/null differ diff --git a/docs/img/webui-sql-dag.png b/docs/img/webui-sql-dag.png index 1c83c176da325..e20630bdddf52 100644 Binary files a/docs/img/webui-sql-dag.png and b/docs/img/webui-sql-dag.png differ diff --git a/docs/img/webui-sql-plan.png b/docs/img/webui-sql-plan.png deleted file mode 100644 index f88e0b24a5411..0000000000000 Binary files a/docs/img/webui-sql-plan.png and /dev/null differ diff --git a/docs/img/webui-sql-tab.png b/docs/img/webui-sql-tab.png index dca58e7d93a31..1d9660cc3df45 100644 Binary files a/docs/img/webui-sql-tab.png and b/docs/img/webui-sql-tab.png differ diff --git a/docs/img/webui-storage-detail.png b/docs/img/webui-storage-detail.png index 837b235be011c..9cb448e1ca463 100644 Binary files a/docs/img/webui-storage-detail.png and b/docs/img/webui-storage-detail.png differ diff --git a/docs/img/webui-storage-tab.png b/docs/img/webui-storage-tab.png index 3a832981cb93e..1f000bef95e2c 100644 Binary files a/docs/img/webui-storage-tab.png and b/docs/img/webui-storage-tab.png differ diff --git a/docs/index.md b/docs/index.md index cb32ddcde7e2b..6d590172e9380 100644 --- a/docs/index.md +++ b/docs/index.md @@ -34,7 +34,8 @@ source, visit [Building Spark](building-spark.html). Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it should run on any platform that runs a supported version of Java. This should include JVMs on x86_64 and ARM64. It's easy to run locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation. -Spark runs on Java 17/21, Scala 2.13, Python 3.10+, and R 3.5+ (Deprecated). +Spark runs on Java 17/21/25, Scala 2.13, Python 3.10+, and R 3.5+ (Deprecated). +Java 25 prior to version 25.0.3 support is deprecated as of Spark 4.2.0. When using the Scala API, it is necessary for applications to use the same version of Scala that Spark was compiled for. Since Spark 4.0.0, it's Scala 2.13. # Running the Examples and Shell diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 777d8d4228e40..aa753e259bcc7 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -708,6 +708,18 @@ See the [configuration page](configuration.html) for information on Spark config 4.1.0 + + spark.kubernetes.allocation.recoveryMode.enabled + (none) + + When Spark driver detects an executor termination due to OOM, Spark starts to + allocate the recovery-mode executors which accept only a single task per executor JVM. + In other words, the recovery-mode executors replace the OOM-terminated executors to + survive from the resource-hungry tasks for the remaining tasks and stages. + If set to false, Spark will not use the recovery-mode executors. + + 4.2.0 + spark.kubernetes.jars.avoidDownloadSchemes (none) @@ -1545,6 +1557,14 @@ See the [configuration page](configuration.html) for information on Spark config 3.2.0 + + spark.kubernetes.driver.annotateExitException + false + + If set to true, Spark will store the exit exception failed applications in the Kubernetes API server using the spark.exit-exception annotation. + + 4.1.0 + spark.kubernetes.driver.service.ipFamilyPolicy SingleStack @@ -1563,6 +1583,14 @@ See the [configuration page](configuration.html) for information on Spark config 3.4.0 + + spark.kubernetes.executor.useDriverPodIP + false + + If true, executor pods use Driver pod IP directly instead of Driver Service. + + 4.1.0 + spark.kubernetes.driver.ownPersistentVolumeClaim true @@ -1660,6 +1688,17 @@ See the [configuration page](configuration.html) for information on Spark config 3.2.0 + + spark.kubernetes.allocation.maxPendingPodsPerRp + Int.MaxValue + + Maximum number of pending PODs allowed per resource profile ID during executor + allocation. This provides finer-grained control over pending pods by limiting them + per resource profile rather than globally. When set, this limit is enforced + independently for each resource profile ID. + + 4.1.0 + spark.kubernetes.allocation.pods.allocator direct @@ -1747,6 +1786,67 @@ See the [configuration page](configuration.html) for information on Spark config 3.3.0 + + spark.kubernetes.executor.resizeInterval + 0s + + Interval between executor resize operations. To disable, set 0 (default). + Takes effect only when org.apache.spark.scheduler.cluster.k8s.ExecutorResizePlugin + is registered via spark.plugins. + + 4.2.0 + + + spark.kubernetes.executor.resizeThreshold + 0.9 + + The threshold to resize. + Takes effect only when org.apache.spark.scheduler.cluster.k8s.ExecutorResizePlugin + is registered via spark.plugins. + + 4.2.0 + + + spark.kubernetes.executor.resizeFactor + 0.1 + + The factor to resize. + Takes effect only when org.apache.spark.scheduler.cluster.k8s.ExecutorResizePlugin + is registered via spark.plugins. + + 4.2.0 + + + spark.kubernetes.executor.pvc.resizeInterval + 5min + + Interval between executor PVC resize operations, in minutes. Defaults to 5 minutes. + Set to 0 to disable. Must be 0 or a positive multiple of 5 minutes. + Takes effect only when org.apache.spark.scheduler.cluster.k8s.ExecutorPVCResizePlugin + is registered via spark.plugins. + + 4.2.0 + + + spark.kubernetes.executor.pvc.resizeThreshold + 0.5 + + The PVC usage ratio (used / capacity) above which the driver triggers a resize. + Takes effect only when org.apache.spark.scheduler.cluster.k8s.ExecutorPVCResizePlugin + is registered via spark.plugins. + + 4.2.0 + + + spark.kubernetes.executor.pvc.resizeFactor + 1.0 + + The factor to grow PVC storage by, relative to the current request. + Takes effect only when org.apache.spark.scheduler.cluster.k8s.ExecutorPVCResizePlugin + is registered via spark.plugins. + + 4.2.0 + #### Pod template properties @@ -1953,10 +2053,10 @@ Spark allows users to specify a custom Kubernetes schedulers. #### Using Volcano as Customized Scheduler for Spark on Kubernetes ##### Prerequisites -* Spark on Kubernetes with [Volcano](https://volcano.sh/en) as a custom scheduler is supported since Spark v3.3.0 and Volcano v1.7.0. Below is an example to install Volcano 1.14.1: +* Spark on Kubernetes with [Volcano](https://volcano.sh/en) as a custom scheduler is supported since Spark v3.3.0 and Volcano v1.7.0. Below is an example to install Volcano 1.14.2: ```bash - kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.1/installer/volcano-development.yaml + kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.14.2/installer/volcano-development.yaml ``` ##### Build diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index dca3b03eeb4e7..620e3800ff010 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -27,6 +27,9 @@ license: | - Since Spark 4.2, Spark enables order-independent checksums for shuffle outputs by default to detect data inconsistencies during indeterminate shuffle stage retries. If a checksum mismatch is detected, Spark rolls back and re-executes all succeeding stages that depend on the shuffle output. If rolling back is not possible for some succeeding stages, the job will fail. To restore the previous behavior, set `spark.sql.shuffle.orderIndependentChecksum.enabled` and `spark.sql.shuffle.orderIndependentChecksum.enableFullRetryOnMismatch` to `false`. - Since Spark 4.2, support for Derby JDBC datasource is deprecated. - Since Spark 4.2, a new default method `mergeWith` has been added to the `CustomTaskMetric` interface. The default implementation sums the two metric values, which is correct for count-type metrics. Data source connector implementations that report non-additive metrics (e.g., maximum, average, compression ratio, or gauge values) must override `mergeWith` to provide correct merge semantics. +- Since Spark 4.2, the virtual `system` catalog hosts the new `system.builtin` and `system.session` namespaces. `system.builtin` exposes built-in functions and functions injected through `SparkSessionExtensions`; `system.session` exposes temporary views, temporary functions, and session variables created in the current session. As a result, 2-part references like `builtin.func()` and `session.func()` now follow a mini-path that tries the system namespace first and the current catalog second, so a persistent schema named `builtin` or `session` is no longer reached by `builtin.func()` / `session.func()` when the system namespace contains an object of the same name. To restore the previous behavior (current catalog first), set `spark.sql.legacy.persistentCatalogFirst` to `true`. Persistent schemas with these names are still allowed but should be reached with an explicit catalog prefix (for example, `spark_catalog.session.x`). See [Reserved system names](sql-ref-identifier.html#reserved-system-names). +- Since Spark 4.2, `CREATE TEMPORARY VIEW`, `CREATE TEMPORARY FUNCTION`, and the corresponding `DROP` statements accept the `session` and `system.session` qualifiers on the object name (in addition to the previously supported unqualified form); for example, `CREATE TEMPORARY VIEW system.session.v AS ...` and `DROP TEMPORARY FUNCTION session.f` are now valid. Any other qualifier on a temporary object is rejected with `INVALID_TEMP_OBJ_QUALIFIER`. +- Since Spark 4.2, the SQL standard `PATH` feature is available: the `SET PATH` statement, the `current_path()` function, path-based resolution of unqualified routines, tables, views, and session variables, and the configurations `spark.sql.path.enabled` (default `false`) and `spark.sql.defaultPath`. The feature is opt-in; when `spark.sql.path.enabled` is `false`, unqualified resolution falls back to a fixed default path and `SET PATH` is rejected with `UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED`. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) and [Name Resolution](sql-ref-name-resolution.html). ## Upgrading from Spark SQL 4.0 to 4.1 diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 8621eca79a6c8..4f21b7b4b3c79 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -420,6 +420,7 @@ Below is a list of all the keywords in Spark SQL. |ANTI|non-reserved|strict-non-reserved|non-reserved| |ANY|reserved|non-reserved|reserved| |ANY_VALUE|non-reserved|non-reserved|non-reserved| +|APPROX|non-reserved|non-reserved|non-reserved| |ARCHIVE|non-reserved|non-reserved|non-reserved| |ARRAY|non-reserved|non-reserved|reserved| |AS|reserved|non-reserved|reserved| @@ -429,6 +430,7 @@ Below is a list of all the keywords in Spark SQL. |ATOMIC|non-reserved|non-reserved|non-reserved| |AUTHORIZATION|reserved|non-reserved|reserved| |BEGIN|non-reserved|non-reserved|non-reserved| +|BERNOULLI|non-reserved|non-reserved|non-reserved| |BETWEEN|non-reserved|non-reserved|reserved| |BIGINT|non-reserved|non-reserved|reserved| |BINARY|non-reserved|non-reserved|reserved| @@ -515,6 +517,7 @@ Below is a list of all the keywords in Spark SQL. |DFS|non-reserved|non-reserved|non-reserved| |DIRECTORIES|non-reserved|non-reserved|non-reserved| |DIRECTORY|non-reserved|non-reserved|non-reserved| +|DISTANCE|non-reserved|non-reserved|non-reserved| |DISTINCT|reserved|non-reserved|reserved| |DISTRIBUTE|non-reserved|non-reserved|non-reserved| |DIV|non-reserved|non-reserved|not a keyword| @@ -528,6 +531,7 @@ Below is a list of all the keywords in Spark SQL. |ESCAPE|reserved|non-reserved|reserved| |ESCAPED|non-reserved|non-reserved|non-reserved| |EVOLUTION|non-reserved|non-reserved|non-reserved| +|EXACT|non-reserved|non-reserved|non-reserved| |EXCEPT|reserved|strict-non-reserved|reserved| |EXCHANGE|non-reserved|non-reserved|non-reserved| |EXCLUDE|non-reserved|non-reserved|non-reserved| @@ -648,6 +652,7 @@ Below is a list of all the keywords in Spark SQL. |NANOSECOND|non-reserved|non-reserved|non-reserved| |NANOSECONDS|non-reserved|non-reserved|non-reserved| |NATURAL|reserved|strict-non-reserved|reserved| +|NEAREST|non-reserved|non-reserved|non-reserved| |NEXT|non-reserved|non-reserved|non-reserved| |NO|non-reserved|non-reserved|reserved| |NONE|non-reserved|non-reserved|reserved| @@ -738,6 +743,7 @@ Below is a list of all the keywords in Spark SQL. |SETS|non-reserved|non-reserved|non-reserved| |SHORT|non-reserved|non-reserved|non-reserved| |SHOW|non-reserved|non-reserved|non-reserved| +|SIMILARITY|non-reserved|non-reserved|non-reserved| |SINGLE|non-reserved|non-reserved|non-reserved| |SKEWED|non-reserved|non-reserved|non-reserved| |SMALLINT|non-reserved|non-reserved|reserved| @@ -760,6 +766,7 @@ Below is a list of all the keywords in Spark SQL. |SUBSTR|non-reserved|non-reserved|non-reserved| |SUBSTRING|non-reserved|non-reserved|non-reserved| |SYNC|non-reserved|non-reserved|non-reserved| +|SYSTEM|non-reserved|non-reserved|reserved| |SYSTEM_PATH|non-reserved|non-reserved|not a keyword| |SYSTEM_TIME|non-reserved|non-reserved|non-reserved| |SYSTEM_VERSION|non-reserved|non-reserved|non-reserved| diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md index 743ad4e3abb22..0ae05d8f46bef 100644 --- a/docs/sql-ref-datatypes.md +++ b/docs/sql-ref-datatypes.md @@ -95,8 +95,8 @@ Spark SQL and DataFrames support the following data types: * Spatial types Spatial objects as defined in the [OGC Simple Feature Access](https://portal.ogc.org/files/?artifact_id=25355) specification. - - `GeometryType`: Represents GEOMETRY values—spatial objects in a Cartesian coordinate system. The type can be fixed to a single SRID, e.g. `geometry(4326)`, or allow mixed SRIDs with `geometry(any)`. Default SRID when not specified is 4326 (WGS 84). - - `GeographyType`: Represents GEOGRAPHY values—spatial objects in a geographic coordinate system (latitude/longitude). Edge interpolation is always SPHERICAL. The type can be fixed to a single SRID, e.g. `geography(4326)`, or allow mixed SRIDs with `geography(any)`. Default SRID is 4326 (WGS 84). + - `GeometryType`: Represents GEOMETRY values, spatial objects in a Cartesian coordinate system. The type can be fixed to a single SRID, e.g. `geometry(4326)`, or allow mixed SRIDs with `geometry(any)`. In SQL, `GEOMETRY` columns must always be declared with an explicit SRID or `ANY`. + - `GeographyType`: Represents GEOGRAPHY values, spatial objects in a geographic coordinate system (latitude/longitude). Edge interpolation is always SPHERICAL. The type can be fixed to a single geographic SRID, e.g. `geography(4326)`, or allow mixed SRIDs with `geography(any)`. In SQL, `GEOGRAPHY` columns must always be declared with an explicit SRID or `ANY`. For more details and built-in functions, see [Geospatial (Geometry/Geography) types](sql-ref-geospatial-types.html). * Complex types @@ -143,8 +143,8 @@ from pyspark.sql.types import * |**TimestampNTZType**|datetime.datetime|TimestampNTZType()| |**DateType**|datetime.date|DateType()| |**DayTimeIntervalType**|datetime.timedelta|DayTimeIntervalType()| -|**GeometryType**|Geometry|GeometryType() or GeometryType(*srid*)| -|**GeographyType**|Geography|GeographyType() or GeographyType(*srid*)| +|**GeometryType**|Geometry|GeometryType(*srid*)
    **Note:** *srid* is required and may be an `int` or the string `"ANY"`.| +|**GeographyType**|Geography|GeographyType(*srid*)
    **Note:** *srid* is required and may be an `int` or the string `"ANY"`.| |**ArrayType**|list, tuple, or array|ArrayType(*elementType*, [*containsNull*])
    **Note:**The default value of *containsNull* is True.| |**MapType**|dict|MapType(*keyType*, *valueType*, [*valueContainsNull]*)
    **Note:**The default value of *valueContainsNull* is True.| |**StructType**|list or tuple|StructType(*fields*)
    **Note:** *fields* is a Seq of StructFields. Also, two fields with the same name are not allowed.| @@ -179,8 +179,8 @@ You can access them by doing |**TimeType**|java.time.LocalTime|TimeType| |**YearMonthIntervalType**|java.time.Period|YearMonthIntervalType| |**DayTimeIntervalType**|java.time.Duration|DayTimeIntervalType| -|**GeometryType**|org.apache.spark.sql.types.Geometry|GeometryType or GeometryType(*srid*)| -|**GeographyType**|org.apache.spark.sql.types.Geography|GeographyType or GeographyType(*srid*)| +|**GeometryType**|org.apache.spark.sql.types.Geometry|GeometryType(*srid*)| +|**GeographyType**|org.apache.spark.sql.types.Geography|GeographyType(*srid*)| |**ArrayType**|scala.collection.Seq|ArrayType(*elementType*, [*containsNull]*)
    **Note:** The default value of *containsNull* is true.| |**MapType**|scala.collection.Map|MapType(*keyType*, *valueType*, [*valueContainsNull]*)
    **Note:** The default value of *valueContainsNull* is true.| |**StructType**|org.apache.spark.sql.Row|StructType(*fields*)
    **Note:** *fields* is a Seq of StructFields. Also, two fields with the same name are not allowed.| @@ -272,8 +272,8 @@ The following table shows the type names as well as aliases used in Spark SQL pa |**DecimalType**|DECIMAL, DEC, NUMERIC| |**YearMonthIntervalType**|INTERVAL YEAR, INTERVAL YEAR TO MONTH, INTERVAL MONTH| |**DayTimeIntervalType**|INTERVAL DAY, INTERVAL DAY TO HOUR, INTERVAL DAY TO MINUTE, INTERVAL DAY TO SECOND, INTERVAL HOUR, INTERVAL HOUR TO MINUTE, INTERVAL HOUR TO SECOND, INTERVAL MINUTE, INTERVAL MINUTE TO SECOND, INTERVAL SECOND| -|**GeometryType**|GEOMETRY or GEOMETRY(*srid*) or GEOMETRY(ANY)| -|**GeographyType**|GEOGRAPHY or GEOGRAPHY(*srid*) or GEOGRAPHY(ANY)| +|**GeometryType**|GEOMETRY(*srid*) or GEOMETRY(ANY)| +|**GeographyType**|GEOGRAPHY(*srid*) or GEOGRAPHY(ANY)| |**ArrayType**|ARRAY\| |**StructType**|STRUCT
    **Note:** ':' is optional.| |**MapType**|MAP| diff --git a/docs/sql-ref-function-current-path.md b/docs/sql-ref-function-current-path.md new file mode 100644 index 0000000000000..afe0d4f6ba54d --- /dev/null +++ b/docs/sql-ref-function-current-path.md @@ -0,0 +1,85 @@ +--- +layout: global +title: current_path function +displayTitle: current_path function +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +Returns the effective SQL Path for the current session as a comma-separated string of +qualified namespace names. See [`SET PATH`](sql-ref-syntax-aux-conf-mgmt-set-path.html) for a +description of what the path is, how to enable it, and how to change it, and +[Name Resolution](sql-ref-name-resolution.html) for how the path drives unqualified name +resolution. + +### Syntax + +```sql +current_path() +``` + +### Arguments + +This function takes no arguments. The parentheses may be omitted. + +### Returns + +A non-nullable `STRING`. Each path entry is written as a dotted name with backticks added only +where required by Spark's identifier rules. Entries are separated by a single comma. + +When the path contains the virtual `CURRENT_SCHEMA` marker, the marker is materialized as the +catalog-qualified current schema (`current_catalog.current_schema`) each time +`current_path()` is evaluated, so subsequent `USE SCHEMA` statements are reflected without +re-issuing `SET PATH`. + +### Examples + +```sql +> SELECT current_path(); + system.builtin,system.session,spark_catalog.default + +-- ANSI no-parens form returns the same value. +> SELECT CURRENT_PATH; + system.builtin,system.session,spark_catalog.default + +-- The output reflects the latest SET PATH. +> SET PATH = spark_catalog.default, system.builtin; +> SELECT current_path(); + spark_catalog.default,system.builtin + +-- CURRENT_SCHEMA on the path is re-evaluated on every call. +> SET PATH = CURRENT_SCHEMA, system.builtin; +> USE spark_catalog.finance; +> SELECT current_path(); + spark_catalog.finance,system.builtin +> USE spark_catalog.default; +> SELECT current_path(); + spark_catalog.default,system.builtin + +-- Inside a persistent view or SQL function body, current_path() returns the invoker's path, +-- not the frozen path captured at creation time. +> SET PATH = spark_catalog.default, system.builtin; +> CREATE VIEW v_path AS SELECT current_path() AS p; +> SET PATH = spark_catalog.other, system.builtin; +> SELECT * FROM v_path; + spark_catalog.other,system.builtin +``` + +### Related Statements + +* [Name Resolution](sql-ref-name-resolution.html) +* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) +* [Built-in Functions](sql-ref-functions-builtin.html) diff --git a/docs/sql-ref-functions-builtin.md b/docs/sql-ref-functions-builtin.md index b6572609a34b8..22e52d0500c53 100644 --- a/docs/sql-ref-functions-builtin.md +++ b/docs/sql-ref-functions-builtin.md @@ -17,6 +17,10 @@ license: | limitations under the License. --- +All built-in functions live in the virtual schema `system.builtin`. They can always be referenced +unambiguously by their fully qualified name (for example `system.builtin.abs`), regardless of any +user-defined function that may share the same name. + ### Aggregate Functions {% include_api_gen generated-agg-funcs-table.html %} #### Examples @@ -126,3 +130,8 @@ license: | {% include_api_gen generated-variant-funcs-table.html %} #### Examples {% include_api_gen generated-variant-funcs-examples.html %} + +### Geospatial ST Functions +{% include_api_gen generated-st-funcs-table.html %} +#### Examples +{% include_api_gen generated-st-funcs-examples.html %} diff --git a/docs/sql-ref-geospatial-types.md b/docs/sql-ref-geospatial-types.md index d0fb1c6ed9465..d5a9d0fece84b 100644 --- a/docs/sql-ref-geospatial-types.md +++ b/docs/sql-ref-geospatial-types.md @@ -25,8 +25,13 @@ Spark SQL supports **GEOMETRY** and **GEOGRAPHY** types for spatial data, as def | Type | Coordinate system | Typical use and notes | |------|-------------------|------------------------| -| **GEOMETRY** | Cartesian (planar) | Projected or local coordinates; planar calculations. Represents points, lines, polygons in a flat coordinate system. Suitable for Web Mercator (SRID 3857), UTM, or local grids (e.g. engineering/CAD). Default SRID in Spark is 4326. | -| **GEOGRAPHY** | Geographic (latitude/longitude) | Earth-based data; distances and areas on the sphere/ellipsoid. Coordinates in longitude and latitude (degrees). Edge interpolation is always **SPHERICAL**. Default SRID is 4326 (WGS 84). | +| **GEOMETRY** | Cartesian (planar) | Projected or local coordinates; planar calculations. Represents points, lines, polygons in a flat coordinate system. Suitable for Web Mercator (SRID 3857), UTM, or local grids (e.g. engineering/CAD). Accepts any SRID in the registry, including SRID 0 (unspecified CRS). | +| **GEOGRAPHY** | Geographic (latitude/longitude) | Earth-based data; distances and areas on the sphere/ellipsoid. Coordinates in longitude and latitude (degrees). Edge interpolation is always **SPHERICAL**. Only geographic SRIDs are accepted; the most common is 4326 (WGS 84). | + +In SQL, `GEOMETRY` and `GEOGRAPHY` columns must always be declared with an explicit SRID +(or `ANY`); see [Type Syntax in SQL](#type-syntax-in-sql) below. When a value is constructed +via `ST_GeomFromWKB(wkb)` without an explicit SRID, the value's SRID is `0` (unspecified), +while `ST_GeogFromWKB(wkb)` always returns a value with SRID 4326. #### When to use GEOMETRY vs GEOGRAPHY @@ -113,16 +118,18 @@ When parsing WKB, Spark applies the following rules. Violations result in a pars ### Built-in Geospatial (ST) Functions -Spark SQL provides scalar functions for working with GEOMETRY and GEOGRAPHY values. They are grouped under **st_funcs** in the [Built-in Functions](sql-ref-functions-builtin.html) API. +Spark SQL provides scalar functions for working with GEOMETRY and GEOGRAPHY values. The full list, +with detailed argument descriptions and examples, is on the +[Built-in Functions](sql-ref-functions-builtin.html#geospatial-st-functions) page under +**Geospatial ST Functions**. The functions provided in the current release are summarized here: | Function | Description | |----------|-------------| -| `ST_AsBinary(geo)` | Returns the GEOMETRY or GEOGRAPHY value as WKB (BINARY). | -| `ST_GeomFromWKB(wkb)` | Parses WKB and returns a GEOMETRY with default SRID 0. | -| `ST_GeomFromWKB(wkb, srid)` | Parses WKB and returns a GEOMETRY with the given SRID. | +| `ST_AsBinary(geo[, endianness])` | Returns the GEOMETRY or GEOGRAPHY value as WKB (BINARY). The optional `endianness` argument is `'NDR'` for little-endian (default) or `'XDR'` for big-endian. | +| `ST_GeomFromWKB(wkb[, srid])` | Parses WKB and returns a GEOMETRY. The optional `srid` argument sets the SRID; if omitted, the SRID is `0`. | | `ST_GeogFromWKB(wkb)` | Parses WKB and returns a GEOGRAPHY with SRID 4326. | | `ST_Srid(geo)` | Returns the SRID of the GEOMETRY or GEOGRAPHY value (NULL if input is NULL). | -| `ST_SetSrid(geo, srid)` | Returns a new GEOMETRY or GEOGRAPHY with the given SRID. | +| `ST_SetSrid(geo, srid)` | Returns a new GEOMETRY or GEOGRAPHY with the given SRID. The new SRID must be valid for the value's type. | **Examples:** @@ -130,6 +137,9 @@ Spark SQL provides scalar functions for working with GEOMETRY and GEOGRAPHY valu SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040'))); -- 0101000000000000000000F03F0000000000000040 +SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040'), 'XDR')); +-- 00000000013FF00000000000004000000000000000 + SELECT ST_Srid(ST_GeogFromWKB(X'0101000000000000000000F03F0000000000000040')); -- 4326 @@ -139,9 +149,68 @@ SELECT ST_Srid(ST_SetSrid(ST_GeomFromWKB(X'0101000000000000000000F03F00000000000 ### SRID and Stored Values -* **Fixed-SRID columns**: Every value in the column must have the same SRID as the column type. Inserting a value with a different SRID can raise an error (or you can use `ST_SetSrid` to set the value’s SRID to match the column). -* **Mixed-SRID columns** (`GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`): Values can have different SRIDs. Only valid SRIDs are allowed. -* **Storage**: Parquet, Delta, and Iceberg store geometry/geography with a fixed SRID per column; mixed-SRID types are for in-memory/query use. When writing to these formats, a concrete (fixed) SRID is required. +* **Fixed-SRID columns**: Every value in the column must have the same SRID as the column type. Inserting a value with a different SRID raises a `GEO_ENCODER_SRID_MISMATCH_ERROR`. Use `ST_SetSrid` to change a value's SRID to match the column. +* **Mixed-SRID columns** (`GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`): Values can have different SRIDs per row. Each value must still have a valid SRID for the type; an invalid SRID raises `ST_INVALID_SRID_VALUE`. +* **Storage**: Parquet, Delta, and Iceberg store geometry/geography with a fixed SRID per column. They do not support persisting `GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`; mixed-SRID types exist for in-memory/query use only. + +### Supported SRIDs + +Spark includes a pre-built SRID registry that combines coordinate systems from the PROJ database with OGC standard overrides. This registry enables validation and proper handling of coordinate systems for geospatial data. + +**SRID Compatibility Rules:** +- **GEOMETRY** accepts all SRIDs in the registry (geographic + projected + SRID 0) +- **GEOGRAPHY** only accepts geographic SRIDs (latitude/longitude coordinate systems) + +#### PROJ Version by Spark Release + +| Spark Version | PROJ Version | +|---------------|--------------| +| 4.2.0 | 9.8.1 | + +The SRID registry is pinned to the PROJ version shown above and is not synced live with external databases. + +#### OGC Standard Overrides + +Spark applies the following OGC standard overrides to specific SRIDs from the PROJ database: + +| SRID | PROJ CRS Identifier | OGC CRS Identifier | Description | +|------|---------------------|-------------------|-------------| +| 4326 | `EPSG:4326` | `OGC:CRS84` | WGS 84 (longitude/latitude order per OGC standard) | +| 4267 | `EPSG:4267` | `OGC:CRS27` | NAD27 | +| 4269 | `EPSG:4269` | `OGC:CRS83` | NAD83 | + + +#### Commonly Used SRIDs + +| SRID | CRS Identifier | Name | CRS Type | Description | +|------|----------------|------|----------|-------------| +| 0 | `SRID:0` | Unspecified | Cartesian | Coordinates with no defined CRS (default for `ST_GeomFromWKB(wkb)`) | +| 4326 | `OGC:CRS84` | WGS 84 | Geographic | World Geodetic System 1984 (longitude/latitude), GPS coordinates, global data (default for GEOGRAPHY) | +| 4267 | `OGC:CRS27` | NAD27 | Geographic | North American Datum 1927 | +| 4269 | `OGC:CRS83` | NAD83 | Geographic | North American Datum 1983 | +| 3857 | `EPSG:3857` | Web Mercator | Projected | Pseudo-Mercator projection used by web mapping services | + +**Notes:** +* `GEOMETRY(0)` means a fixed SRID of 0. For mixed per-row SRIDs, use `GEOMETRY(ANY)`. +* [Parquet](https://github.com/apache/parquet-format/blob/master/Geospatial.md) + and [Iceberg](https://github.com/apache/iceberg/blob/main/format/spec.md) geospatial + specifications require a fixed SRID per column, so they do not support persisting + `GEOMETRY(ANY)` or `GEOGRAPHY(ANY)`. + +#### SRID Validation + +**Invalid SRID (not in registry):** +```sql +SELECT ST_GeomFromWKB(X'0101000000000000000000F03F0000000000000040', 99999); +-- Throws [ST_INVALID_SRID_VALUE] +``` + +**Projected SRID with GEOGRAPHY type:** +```sql +CREATE TABLE invalid_geo (id BIGINT, loc GEOGRAPHY(3857)); +-- Throws [ST_INVALID_SRID_VALUE] (3857 is projected, not geographic) +``` + ### Data Types Reference diff --git a/docs/sql-ref-identifier.md b/docs/sql-ref-identifier.md index 7aca08ea9fd8d..b4a990c4111b3 100644 --- a/docs/sql-ref-identifier.md +++ b/docs/sql-ref-identifier.md @@ -52,6 +52,30 @@ An identifier is a string used to identify a database object such as a table, vi Any character from the character set. Use ` to escape special characters (e.g., `). +### Reserved system names + +`system`, `session`, and `builtin` have special meaning and should not be used as user-defined +catalog or schema names. + +| Name | Position | Notes | +| :--- | :------- | :---- | +| `system` | catalog | Virtual catalog hosting `system.builtin` and `system.session`. Spark does not load `system` through the v2 catalog API; setting `spark.sql.catalog.system = ...` is unsupported and produces undefined results. The current catalog cannot be `system`. | +| `builtin` | schema | A persistent schema named `builtin` is allowed but discouraged because it collides with `system.builtin`. | +| `session` | schema | A persistent schema named `session` is allowed but discouraged because it collides with `system.session`. | + +A partially qualified 2-part reference like `builtin.x` or `session.x` walks a small **mini-path** to +choose the implicit catalog: by default it resolves to `system.builtin.x` / `system.session.x` +if such an object exists, and otherwise falls back to the same name in the current catalog. So +an object in a persistent `builtin` or `session` schema is shadowed only when an object of the +same name exists in the corresponding system namespace. The shadowed object stays reachable via its fully qualified 3-part name (for example +`spark_catalog.session.x`). Set `spark.sql.legacy.persistentCatalogFirst` to `true` to reverse +the preference: the current catalog is tried first and the system namespace becomes the fallback. + +The `system.builtin` and `system.session` namespaces are described in +[SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). Temporary objects in `system.session` are +documented under [CREATE VIEW](sql-ref-syntax-ddl-create-view.html) and +[CREATE FUNCTION (SQL)](sql-ref-syntax-ddl-create-sql-function.html). + ### Examples ```sql diff --git a/docs/sql-ref-name-resolution.md b/docs/sql-ref-name-resolution.md index 2532f05e164b3..3d574e58a9ad2 100644 --- a/docs/sql-ref-name-resolution.md +++ b/docs/sql-ref-name-resolution.md @@ -19,7 +19,7 @@ license: | limitations under the License. --- -Name resolution is the process by which [identifiers](sql-ref-identifier.html) are resolved to specific column-, field-, parameter-, or table-references. +Name resolution is the process by which [identifiers](sql-ref-identifier.html) are resolved to specific column-, field-, parameter-, table-, function-, or variable-references. ## Column, field, parameter, and variable resolution @@ -50,7 +50,7 @@ In detail, resolution of identifiers to a specific reference follows these rules 1. **Parameterless function reference** - If the identifier is unqualified and matches `current_user`, `current_date`, or `current_timestamp`: Resolve it as one of these functions. + If the identifier is unqualified and matches `current_user`, `current_date`, `current_time`, `current_timestamp`, or `current_path`: Resolve it as one of these functions. 1. **Column DEFAULT specification** @@ -137,7 +137,10 @@ In detail, resolution of identifiers to a specific reference follows these rules 1. **Session Variables** - 1. Match the identifier to a variable name. If the identifier is qualified, the qualifier must be `session` or `system.session`. + 1. Match the identifier to a session variable name. + If the identifier is qualified, the qualifier must be `session` or `system.session`. + If the identifier is unqualified, `system.session` must be present on the + [SQL Path](sql-ref-syntax-aux-conf-mgmt-set-path.html) (the default path includes it). 1. If the identifier is qualified, match to a field or map key of a variable following rule 1.c ### Limitations @@ -256,37 +259,54 @@ This restriction also applies to parameter references in SQL functions. frm.a lat.b func.c ``` -## Table and view resolution - -An identifier in table-reference can be any one of the following: +## Object name resolution -- Persistent table or view -- Common table expression (CTE) -- [Temporary view](sql-ref-syntax-ddl-create-view.html) +Tables, views, and functions follow the same resolution rule. It depends on how many parts the +identifier has. -Resolution of an identifier depends on whether it is qualified: +### Fully qualified (3 parts) — `catalog.schema.object` -- **Qualified** +The reference is unique and is looked up in `catalog.schema`. `system.builtin.object` identifies +a built-in function; `system.session.object` identifies a temporary view, function, or session +variable. - If the identifier is fully qualified with three parts: `catalog.schema.relation`, it is unique. +### Partially qualified (2 parts) — `schema.object` - If the identifier consists of two parts: `schema.relation`, it is further qualified with the result of `SELECT current_catalog()` to make it unique. +The identifier is qualified with `current_catalog` — producing +`current_catalog.schema.object` — unless the leading part is `session` (or `builtin`, for +functions). In that case Spark uses the +[mini-path](sql-ref-identifier.html#reserved-system-names) to choose the implicit catalog, +returning the first match: -- **Unqualified** +| `spark.sql.legacy.persistentCatalogFirst` | Mini-path tried in order | +| :-------------------------------------- | :----------------------- | +| `false` (default) | the system namespace (`system.session.x` / `system.builtin.x`), then the current catalog's `session.x` / `builtin.x` | +| `true` (legacy) | the current catalog's `session.x` / `builtin.x`, then the system namespace (`system.session.x` / `system.builtin.x`) | - 1. **Common table expression** +### Unqualified (1 part) — `object` - If the reference is within the scope of a `WITH` clause, match the identifier to a CTE starting with the immediately containing `WITH` clause and moving outwards from there. +In queries and DML, Spark walks the [SQL Path](sql-ref-syntax-aux-conf-mgmt-set-path.html) and +returns the first match. In DDL, the identifier is qualified with `current_catalog.current_schema`. - 1. **Temporary view** +> Note: persistent views and SQL UDFs capture the SQL Path at `CREATE` time. When the view or +> function is invoked, its body resolves names — tables, views, and functions — +> against that frozen path, not the invoker's current path. `current_schema()` and +> `current_path()` inside the body still return the invoker's context. See +> [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). - Match the identifier to any temporary view defined within the current session. +## Table and view resolution - 1. **Persisted table** +A table reference can be a persistent table or view, a temporary view, or a common table +expression (CTE). - Fully qualify the identifier by pre-pending the result of `SELECT current_catalog()` and `SELECT current_schema()` and look it up as a persistent relation. +Resolution follows [Object name resolution](#object-name-resolution), with one addition for +unqualified references: when the reference is inside a `WITH` clause, Spark first matches the +identifier against CTEs from the innermost `WITH` outward. If no CTE matches, Spark walks the +SQL Path. -If the relation cannot be resolved to any table, view, or CTE, Databricks raises a TABLE_OR_VIEW_NOT_FOUND error. +If the relation cannot be resolved, Spark raises `TABLE_OR_VIEW_NOT_FOUND`. The error includes +the effective search path, for example +`searchPath = [system.builtin, system.session, spark_catalog.default]`. ### Examples @@ -317,7 +337,13 @@ If the relation cannot be resolved to any table, view, or CTE, Databricks raises > SELECT c1 FROM rel; 2 --- Temporary views cannot be qualified, so qualifiecation resolved to the table: +-- A temporary view can be qualified with `session` or `system.session`: +> SELECT c1 FROM session.rel; + 2 +> SELECT c1 FROM system.session.rel; + 2 + +-- Other 2-part qualifications resolve to the persisted table: > SELECT c1 FROM default.rel; 1 @@ -343,45 +369,34 @@ If the relation cannot be resolved to any table, view, or CTE, Databricks raises SELECT 1), cte; [TABLE_OR_VIEW_NOT_FOUND] The table or view `cte` cannot be found. -``` - -## Function resolution - -A function reference is recognized by the mandatory trailing set of parentheses. - -It can resolve to: - -- A builtin function provided by Spark, -- A temporary user defined function scoped to the current session, or -- A persistent user defined function. -Resolution of a function name depends on whether it is qualified: +-- PATH drives unqualified relation lookup order +> CREATE SCHEMA db_a; +> CREATE SCHEMA db_b; +> CREATE TABLE db_a.t USING parquet AS SELECT 1 AS v; +> CREATE TABLE db_b.t USING parquet AS SELECT 2 AS v; -- **Qualified** - - If the name is fully qualified with three parts: `catalog.schema.function`, it is unique. - - If the name consists of two parts: `schema.function`, it is further qualified with the result of `SELECT current_catalog()` to make it unique. - - The function is then looked up in the catalog. - -- **Unqualified** - - For unqualified function names Spark follows a fixed order of precedence (`PATH`): - - 1. **Builtin function** - - If a function by this name exists among the set of built-in functions, that function is chosen. +> SET PATH = spark_catalog.db_a, spark_catalog.db_b, system.builtin; +> SELECT v FROM t; + 1 - 1. **Temporary function** +> SET PATH = spark_catalog.db_b, spark_catalog.db_a, system.builtin; +> SELECT v FROM t; + 2 - If a function by this name exists among the set of temporary functions, that function is chosen. +-- Three-part `system.session.x` references the temporary scope only: +> SELECT * FROM system.session.no_such_view; + [TABLE_OR_VIEW_NOT_FOUND] ... `system`.`session`.`no_such_view` ... +``` - 1. **Persisted function** +## Function resolution - Fully qualify the function name by pre-pending the result of `SELECT current_catalog()` and `SELECT current_schema()` and look it up as a persistent function. +A function reference is recognized by the trailing parentheses, and follows +[Object name resolution](#object-name-resolution). -If the function cannot be resolved Spark raises an `UNRESOLVED_ROUTINE` error. +If the function cannot be resolved, Spark raises `UNRESOLVED_ROUTINE`. The error includes the +effective search path, for example +`searchPath = [system.builtin, system.session, spark_catalog.default]`. ### Examples @@ -420,4 +435,45 @@ If the function cannot be resolved Spark raises an `UNRESOLVED_ROUTINE` error. -- To resolve the persistent function it now needs qualification > SELECT spark_catalog.default.func(4, 3); 6 + +-- A built-in can always be reached by qualification, even when shadowed. +-- Put system.session ahead of system.builtin so a matching temp `abs` shadows the built-in. +> SET PATH = system.session, system.builtin, spark_catalog.default; +> CREATE TEMPORARY FUNCTION abs(x INT) RETURNS INT RETURN x + 100; + +-- Unqualified abs(-5) resolves to the temp (-5 + 100 = 95). +> SELECT abs(-5); + 95 + +-- system.builtin.abs and builtin.abs reach the built-in around the shadow. +> SELECT system.builtin.abs(-5); + 5 +> SELECT builtin.abs(-5); + 5 + +-- session.abs reaches the temp explicitly. +> SELECT session.abs(-5); + 95 + +> DROP TEMPORARY FUNCTION abs; +> SET PATH = DEFAULT_PATH; + +-- PATH controls unqualified routine lookup order +> CREATE SCHEMA path_a; +> CREATE SCHEMA path_b; +> CREATE FUNCTION path_a.pick() RETURNS INT RETURN 10; +> CREATE FUNCTION path_b.pick() RETURNS INT RETURN 20; + +> SET PATH = spark_catalog.path_a, spark_catalog.path_b, system.builtin; +> SELECT pick(); + 10 + +> SET PATH = spark_catalog.path_b, spark_catalog.path_a, system.builtin; +> SELECT pick(); + 20 + +-- Unresolved routine lists the effective search path +> SET PATH = spark_catalog.default, system.builtin; +> SELECT does_not_exist(); + [UNRESOLVED_ROUTINE] ... searchPath: [`spark_catalog`.`default`, `system`.`builtin`] ... ``` diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set-path.md b/docs/sql-ref-syntax-aux-conf-mgmt-set-path.md new file mode 100644 index 0000000000000..64e698fa193ae --- /dev/null +++ b/docs/sql-ref-syntax-aux-conf-mgmt-set-path.md @@ -0,0 +1,248 @@ +--- +layout: global +title: SET PATH +displayTitle: SET PATH +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +### Description + +`SET PATH` changes the **SQL Path** of the current session. + +The SQL Path is an ordered list of catalog-qualified schema names that Spark walks when +resolving unqualified references to functions, tables, views, and session variables in queries +and DML (`SELECT`, `INSERT`, `UPDATE`, `DELETE`, `MERGE`). The first match wins. DDL +(`CREATE TABLE`, `CREATE VIEW`, `CREATE FUNCTION`, `DROP`, `ALTER`, ...) resolves unqualified +object names against `current_catalog.current_schema`, not the path; so `CREATE TABLE t` always +creates `t` in the current schema regardless of the path. + +The path can include two virtual namespaces in the `system` catalog: + +- `system.builtin` — built-in functions, including those injected by + `SparkSessionExtensions`. +- `system.session` — temporary views, temporary functions, and session variables in the + current session. + +`SET PATH` is controlled by `spark.sql.path.enabled`. When it is `false` (the default), +`SET PATH` raises `UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED`. Unqualified resolution and +[`current_path()`](sql-ref-function-current-path.html) still use the default path. + +The initial value of `PATH` in a session is `DEFAULT_PATH`. `DEFAULT_PATH` is either the value of +`spark.sql.defaultPath`, or, when that configuration is empty, a built-in value composed of +`system.builtin`, `system.session`, and the current schema. To override, set +`spark.sql.defaultPath`. See the [`DEFAULT_PATH` parameter](#parameters) for the exact derivation +rules. + +The effect of `SET PATH` is scoped to the current session and is lost when the session ends. To +re-apply the current default path mid-session, run `SET PATH = DEFAULT_PATH`. (This stores a +snapshot of `DEFAULT_PATH` at the moment of the statement; later changes to +`spark.sql.defaultPath` are not picked up automatically.) Cloned sessions inherit the parent's +path at clone time; later changes in the child do not propagate back. + +Persistent views and SQL UDFs capture the path at `CREATE` time into the object's metadata. +Each invocation resolves the body against that frozen path, not the invoker's current path; +`current_schema()` and `current_path()` inside the body still return the invoker's context. + +The leading names `session` and `builtin` have special meaning in 2-part references; see +[Reserved system names](sql-ref-identifier.html#reserved-system-names). + +### Syntax + +```sql +SET PATH = path_element [ , ... ] + +path_element + { DEFAULT_PATH | + SYSTEM_PATH | + PATH | + CURRENT_SCHEMA | + CURRENT_DATABASE | + catalog_name . namespace [ . namespace ... ] } +``` + +### Parameters + +* **`DEFAULT_PATH`** + + Expands to the session's default path. The default path has two layers: + + 1. If `spark.sql.defaultPath` is set to a non-empty value, that value is parsed using the same + grammar as `SET PATH` (with one restriction: the `PATH` keyword is not allowed inside the + conf value, since it would be self-referential). + + The conf value is validated for syntax at the time it is set; an invalid value is rejected. + Static duplicates inside the conf are tolerated (unlike interactive `SET PATH`, which + rejects them) so a later `USE SCHEMA` cannot turn a previously valid default into a runtime + error. A `DEFAULT_PATH` token inside the conf value resolves to the spark-built-in default + below to avoid a cycle, rather than recursing. + + 2. If `spark.sql.defaultPath` is empty (the factory setting), the spark-built-in default + applies: `system.builtin`, `system.session`, and the current schema + (`current_catalog.current_schema`), in that order. + + To change the default path, set `spark.sql.defaultPath` via any of the usual mechanisms + (`SET spark.sql.defaultPath = ...` at runtime, `--conf` on `spark-submit`, `SparkConf`, or + `spark-defaults.conf`); clear it with `RESET spark.sql.defaultPath` to return to the + spark-built-in default. + +* **`SYSTEM_PATH`** + + Expands to the system-managed namespaces under the `system` catalog. Today this is just + `system.builtin`, but it is reserved for future system-managed schemas (for example, hosting + built-in AI, geospatial, or ML functions). + +* **`PATH`** + + Expands to the **current** value of the SQL Path. Useful for appending entries without + re-typing them, for example `SET PATH = PATH, spark_catalog.analytics`. + `PATH` is not allowed in the value of `spark.sql.defaultPath` (it would create a cycle). + +* **`CURRENT_SCHEMA`** / **`CURRENT_DATABASE`** + + A virtual marker that resolves to the catalog-qualified current schema + (`current_catalog.current_schema`) every time the path is consulted. This means subsequent + `USE SCHEMA` statements are picked up without re-issuing `SET PATH`. + `CURRENT_DATABASE` is a synonym for `CURRENT_SCHEMA`. + +* **`catalog_name . namespace [ . namespace ... ]`** + + An explicit catalog-qualified namespace reference (`catalog.schema` or, for catalogs with + multi-level namespaces, `catalog.ns1.ns2...`). At least two parts are required. + The catalog and namespace do not need to exist at the time of `SET PATH`; non-existent entries + are silently skipped during name resolution. + + Identifier quoting follows the usual rules. Backtick-quoted parts that contain a dot are + preserved, for example ``spark_catalog.`sch.b` ``. + +### Semantics + +* Setting the path takes effect immediately. +* Identifier case is preserved in storage and in `current_path()` output. +* Duplicate entries are detected after expansion and raise `DUPLICATE_SQL_PATH_ENTRY`. + Comparisons honor the session's case sensitivity setting. Because `CURRENT_DATABASE` is an + alias for `CURRENT_SCHEMA`, listing both is flagged as a duplicate. + +### Error conditions + +| Condition | Cause | +| :-------- | :---- | +| `UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED` | `SET PATH` was issued while `spark.sql.path.enabled` is `false`. | +| `INVALID_SQL_PATH_SCHEMA_REFERENCE` | An entry with fewer than two parts was given. | +| `DUPLICATE_SQL_PATH_ENTRY` | Two entries collapsed to the same concrete namespace after expansion. | + +### Examples + +```sql +-- Enable the feature first; the default is false. +> SET spark.sql.path.enabled = true; + +-- Observe the default path. +> SELECT current_path(); + system.builtin,system.session,spark_catalog.default + +-- Replace the path with explicit entries. +> SET PATH = spark_catalog.default, system.builtin; +> SELECT current_path(); + spark_catalog.default,system.builtin + +-- Identifier case is preserved. +> SET PATH = Spark_Catalog.Default, System.Builtin; +> SELECT current_path(); + Spark_Catalog.Default,System.Builtin + +-- Backtick-quoted parts that contain a dot round-trip with quoting. +> SET PATH = spark_catalog.`sch.b`, system.builtin; +> SELECT current_path(); + spark_catalog.`sch.b`,system.builtin + +-- DEFAULT_PATH and SYSTEM_PATH shortcuts. +> SET PATH = DEFAULT_PATH; +> SELECT current_path(); + system.builtin,system.session,spark_catalog.default +> SET PATH = SYSTEM_PATH; +> SELECT current_path(); + system.builtin + +-- SYSTEM_PATH composes naturally with the working schema. +> SET PATH = SYSTEM_PATH, CURRENT_SCHEMA; +> SELECT current_path(); + system.builtin,spark_catalog.default + +-- Append an entry by referring to the current path. +> SET PATH = spark_catalog.default, system.builtin; +> SET PATH = PATH, spark_catalog.analytics; +> SELECT current_path(); + spark_catalog.default,system.builtin,spark_catalog.analytics + +-- CURRENT_SCHEMA is re-evaluated each time; USE SCHEMA updates the effective path. +> SET PATH = CURRENT_SCHEMA, system.builtin; +> USE spark_catalog.finance; +> SELECT current_path(); + spark_catalog.finance,system.builtin +> USE spark_catalog.default; +> SELECT current_path(); + spark_catalog.default,system.builtin + +-- DEFAULT_PATH can be customized via the conf. +> SET spark.sql.defaultPath = system.session, system.builtin, current_schema; +> SET PATH = DEFAULT_PATH; +> SELECT current_path(); + system.session,system.builtin,spark_catalog.default +> RESET spark.sql.defaultPath; + +-- Append a schema of shared UDFs so callers do not have to qualify them. +> CREATE SCHEMA spark_catalog.shared_udfs; +> CREATE FUNCTION spark_catalog.shared_udfs.to_iso_date(d DATE) RETURNS STRING + RETURN date_format(d, 'yyyy-MM-dd'); +> SET PATH = PATH, spark_catalog.shared_udfs; +> SELECT to_iso_date(DATE'2026-05-22'); + 2026-05-22 + +-- Drop system.session from the path to force temporary objects to be qualified explicitly. +> CREATE TEMPORARY FUNCTION revenue() RETURNS INT RETURN 42; +> SELECT revenue(); -- resolves via the default path + 42 +> SET PATH = system.builtin, current_schema; +> SELECT revenue(); -- now must be qualified + [UNRESOLVED_ROUTINE] `revenue` ... +> SELECT session.revenue(); + 42 + +-- Error cases. +> SET PATH = spark_catalog.default, spark_catalog.default; + [DUPLICATE_SQL_PATH_ENTRY] + +> SET PATH = my_schema_no_catalog; + [INVALID_SQL_PATH_SCHEMA_REFERENCE] + +-- PATH is rejected as a value of the DEFAULT_PATH conf (would cycle). +> SET spark.sql.defaultPath = PATH, system.builtin; + [Error: invalid value] + +-- SET PATH is rejected when the feature is disabled. +> SET spark.sql.path.enabled = false; +> SET PATH = spark_catalog.default; + [UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED] +``` + +### Related Statements + +* [Name Resolution](sql-ref-name-resolution.html) +* [`current_path` function](sql-ref-function-current-path.html) +* [SET](sql-ref-syntax-aux-conf-mgmt-set.html) +* [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html) +* [USE DATABASE](sql-ref-syntax-ddl-usedb.html) diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set.md b/docs/sql-ref-syntax-aux-conf-mgmt-set.md index 9e57a221f9688..396559ca48e74 100644 --- a/docs/sql-ref-syntax-aux-conf-mgmt-set.md +++ b/docs/sql-ref-syntax-aux-conf-mgmt-set.md @@ -25,6 +25,8 @@ The SET command sets a property, returns the value of an existing property or re To set SQL variables defined with [DECLARE VARIABLE](sql-ref-syntax-ddl-declare-variable.html) use [SET VAR](sql-ref-syntax-aux-set-var.html). +To change the session SQL Path used for unqualified name resolution use [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). + ### Syntax ```sql @@ -72,3 +74,4 @@ SET spark.sql.variable.substitute; * [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html) * [SET VAR](sql-ref-syntax-aux-set-var.html) +* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) diff --git a/docs/sql-ref-syntax-aux-conf-mgmt.md b/docs/sql-ref-syntax-aux-conf-mgmt.md index 3312bcb503500..6b809d4a94655 100644 --- a/docs/sql-ref-syntax-aux-conf-mgmt.md +++ b/docs/sql-ref-syntax-aux-conf-mgmt.md @@ -22,3 +22,4 @@ license: | * [SET](sql-ref-syntax-aux-conf-mgmt-set.html) * [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html) * [SET TIME ZONE](sql-ref-syntax-aux-conf-mgmt-set-timezone.html) + * [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) diff --git a/docs/sql-ref-syntax-aux-describe-function.md b/docs/sql-ref-syntax-aux-describe-function.md index 0c5a3d751a564..2da1b9466fc23 100644 --- a/docs/sql-ref-syntax-aux-describe-function.md +++ b/docs/sql-ref-syntax-aux-describe-function.md @@ -22,9 +22,15 @@ license: | ### Description `DESCRIBE FUNCTION` statement returns the basic metadata information of an -existing function. The metadata information includes the function name, implementing -class and the usage details. If the optional `EXTENDED` option is specified, the basic -metadata information is returned along with the extended usage information. +existing function. For built-in and external (Java/Hive) functions the output includes the +function name, implementing class, and usage details. For +[SQL user-defined functions](sql-ref-syntax-ddl-create-sql-function.html) the output describes +the function signature (input parameters, return type/columns) and, with `EXTENDED`, the +function body, characteristics, and the frozen +[SQL Path](sql-ref-syntax-aux-conf-mgmt-set-path.html) that was captured at creation time. + +If the optional `EXTENDED` option is specified, the basic metadata is returned along with the +extended information. ### Syntax @@ -36,12 +42,14 @@ metadata information is returned along with the extended usage information. * **function_name** - Specifies a name of an existing function in the system. The function name may be - optionally qualified with a database name. If `function_name` is qualified with - a database then the function is resolved from the user specified database, otherwise - it is resolved from the current database. + Specifies a name of an existing function. The function name follows the regular + [name resolution](sql-ref-name-resolution.html#function-resolution) rules: unqualified + names walk the SQL Path; 3-part names target the chosen `catalog.schema` directly + (including the system namespaces `system.builtin` and `system.session`); 2-part names that + lead with `builtin` or `session` follow a mini-path across the system namespace and the + current catalog. - **Syntax:** `[ database_name. ] function_name` + **Syntax:** `[ catalog_name. ] [ database_name. ] function_name` ### Examples @@ -102,6 +110,70 @@ DESC FUNCTION EXTENDED explode; | 10 | | 20 | +---------------------------------------------------------------+ + +-- Built-in functions can be qualified with `builtin` or `system.builtin`. +DESC FUNCTION system.builtin.abs; ++-------------------------------------------------------------------+ +|function_desc | ++-------------------------------------------------------------------+ +|Function: abs | +|Class: org.apache.spark.sql.catalyst.expressions.Abs | +|Usage: abs(expr) - Returns the absolute value of the numeric value.| ++-------------------------------------------------------------------+ + +-- Describe a SQL scalar UDF: the output uses the SQL function layout +-- (Function / Type / Input / Returns). +CREATE FUNCTION area(x DOUBLE, y DOUBLE) RETURNS DOUBLE RETURN x * y; +DESC FUNCTION area; ++-------------------------------+ +|function_desc | ++-------------------------------+ +|Function: spark_catalog.default.area| +|Type: SCALAR | +|Input: x DOUBLE | +| y DOUBLE | +|Returns: DOUBLE | ++-------------------------------+ + +-- Describe a SQL table UDF. +CREATE FUNCTION getemps(deptno INT) + RETURNS TABLE (id INT, name STRING) + RETURN SELECT id, name FROM employee WHERE employee.deptno = getemps.deptno; +DESC FUNCTION getemps; ++--------------------------------------+ +|function_desc | ++--------------------------------------+ +|Function: spark_catalog.default.getemps| +|Type: TABLE | +|Input: deptno INT | +|Returns: id INT | +| name STRING | ++--------------------------------------+ + +-- DESC FUNCTION EXTENDED for a SQL UDF adds the body, the characteristic clauses, +-- the captured SQL configs, the owner, the create time, and the frozen SQL Path. +SET PATH = spark_catalog.default, system.builtin; +CREATE FUNCTION frozen_fn() RETURNS INT + COMMENT 'demo function' + RETURN (SELECT MAX(id) FROM frozen_t); +DESC FUNCTION EXTENDED frozen_fn; ++-----------------------------------------------------------------+ +|function_desc | ++-----------------------------------------------------------------+ +|Function: spark_catalog.default.frozen_fn | +|Type: SCALAR | +|Input: () | +|Returns: INT | +|Comment: demo function | +|Deterministic:false | +|Data Access: READS SQL DATA | +|Configs: spark.sql.ansi.enabled=true | +| ... | +|Owner: | +|Create Time: Wed Apr 30 14:05:43 PDT 2026 | +|Body: (SELECT MAX(id) FROM frozen_t) | +|SQL Path: spark_catalog.default, system.builtin | ++-----------------------------------------------------------------+ ``` ### Related Statements @@ -109,3 +181,5 @@ DESC FUNCTION EXTENDED explode; * [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html) * [DESCRIBE TABLE](sql-ref-syntax-aux-describe-table.html) * [DESCRIBE QUERY](sql-ref-syntax-aux-describe-query.html) +* [CREATE FUNCTION (SQL)](sql-ref-syntax-ddl-create-sql-function.html) +* [Name Resolution](sql-ref-name-resolution.html) diff --git a/docs/sql-ref-syntax-aux-describe-table.md b/docs/sql-ref-syntax-aux-describe-table.md index 46d9432f5d072..cb84b0c7fefb2 100644 --- a/docs/sql-ref-syntax-aux-describe-table.md +++ b/docs/sql-ref-syntax-aux-describe-table.md @@ -105,6 +105,10 @@ to return the metadata pertaining to a partition or column respectively. "view_schema_mode": "", "view_catalog_and_namespace": "", "view_query_output_columns": ["col1", "col2"], + // SQL Path captured at the time of permanent view creation + "sql_path": [ + {"catalog_name": "", "namespace": [""]} + ], // Spark SQL configurations captured at the time of permanent view creation "view_creation_spark_configuration": { "conf1": "", @@ -272,8 +276,83 @@ DESCRIBE customer salesdb.customer.name; +---------+----------+ -- Returns the table metadata in JSON format. +-- (Formatted for readability; the actual output is on a single line.) DESC FORMATTED customer AS JSON; -{"table_name":"customer","catalog_name":"spark_catalog","schema_name":"default","namespace":["default"],"columns":[{"name":"cust_id","type":{"name":"integer"},"nullable":true},{"name":"name","type":{"name":"string"},"comment":"Short name","nullable":true},{"name":"state","type":{"name":"varchar","length":20},"nullable":true}],"location": "file:/tmp/salesdb.db/custom...","created_time":"2020-04-07T14:05:43Z","last_access":"UNKNOWN","created_by":"None","type":"MANAGED","provider":"parquet","partition_provider":"Catalog","partition_columns":["state"]} +{ + "table_name": "customer", + "catalog_name": "spark_catalog", + "schema_name": "default", + "namespace": ["default"], + "columns": [ + {"name": "cust_id", "type": {"name": "int"}, "nullable": true}, + {"name": "name", "type": {"name": "string"}, "comment": "Short name", "nullable": true}, + {"name": "state", "type": {"name": "varchar", "length": 20}, "nullable": true} + ], + "location": "file:/tmp/salesdb.db/custom...", + "created_time": "2020-04-07T14:05:43Z", + "last_access": "UNKNOWN", + "created_by": "None", + "type": "MANAGED", + "provider": "parquet", + "partition_provider": "Catalog", + "partition_columns": ["state"] +} + +-- DESCRIBE EXTENDED on a view emits view-specific rows. +SET PATH = spark_catalog.default, system.builtin; +CREATE VIEW recent_customers AS + SELECT cust_id, name FROM customer WHERE cust_id > 1000; + +DESCRIBE EXTENDED recent_customers; ++----------------------------+---------------------------------------+--------+ +| col_name| data_type| comment| ++----------------------------+---------------------------------------+--------+ +| cust_id| int| null| +| name| string| null| +| | | | +|# Detailed Table Information| | | +| Catalog | spark_catalog| | +| Database| default| | +| Table| recent_customers| | +| Type| VIEW| | +| View Text|SELECT cust_id, name FROM customer ... | | +| View Original Text|SELECT cust_id, name FROM customer ... | | +| View Schema Mode| COMPENSATION| | +| View Catalog and Namespace| spark_catalog.default | | +| View Query Output Columns| [`cust_id`, `name`] | | +| SQL Path| spark_catalog.default, system.builtin| | ++----------------------------+---------------------------------------+--------+ + +-- The same metadata in JSON form. +-- (Formatted for readability; the actual output is on a single line.) +DESCRIBE EXTENDED recent_customers AS JSON; +{ + "table_name": "recent_customers", + "catalog_name": "spark_catalog", + "schema_name": "default", + "namespace": ["default"], + "columns": [ + {"name": "cust_id", "type": {"name": "int"}, "nullable": true}, + {"name": "name", "type": {"name": "string", "collation": "UTF8_BINARY"}, "nullable": true} + ], + "created_time": "2026-05-22T10:00:00Z", + "last_access": "UNKNOWN", + "created_by": "Spark 4.2.0", + "type": "VIEW", + "collation": "UTF8_BINARY", + "view_text": "SELECT cust_id, name FROM customer WHERE cust_id > 1000", + "view_original_text": "SELECT cust_id, name FROM customer WHERE cust_id > 1000", + "view_schema_mode": "COMPENSATION", + "view_catalog_and_namespace": "spark_catalog.default", + "view_query_output_columns": ["cust_id", "name"], + "sql_path": [ + {"catalog_name": "spark_catalog", "namespace": ["default"]}, + {"catalog_name": "system", "namespace": ["builtin"]} + ], + "view_creation_spark_configuration": { + "spark.sql.ansi.enabled": "true" + } +} ``` ### Related Statements @@ -281,3 +360,4 @@ DESC FORMATTED customer AS JSON; * [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html) * [DESCRIBE QUERY](sql-ref-syntax-aux-describe-query.html) * [DESCRIBE FUNCTION](sql-ref-syntax-aux-describe-function.html) +* [Name Resolution](sql-ref-name-resolution.html) diff --git a/docs/sql-ref-syntax-ddl-create-database.md b/docs/sql-ref-syntax-ddl-create-database.md index 9d8bf47844724..9125ca78dc9ee 100644 --- a/docs/sql-ref-syntax-ddl-create-database.md +++ b/docs/sql-ref-syntax-ddl-create-database.md @@ -38,6 +38,9 @@ CREATE { DATABASE | SCHEMA } [ IF NOT EXISTS ] database_name Specifies the name of the database to be created. + > Note: avoid naming a database `session` or `builtin`; see + > [Reserved system names](sql-ref-identifier.html#reserved-system-names). + * **IF NOT EXISTS** Creates a database with the given name if it does not exist. If a database with the same name already exists, nothing will happen. @@ -85,3 +88,4 @@ DESCRIBE DATABASE EXTENDED customer_db; * [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html) * [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html) +* [Name Resolution](sql-ref-name-resolution.html) diff --git a/docs/sql-ref-syntax-ddl-create-function.md b/docs/sql-ref-syntax-ddl-create-function.md index e0e2545f5ee3f..2565870494410 100644 --- a/docs/sql-ref-syntax-ddl-create-function.md +++ b/docs/sql-ref-syntax-ddl-create-function.md @@ -50,8 +50,9 @@ CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ] * **TEMPORARY** Indicates the scope of function being created. When `TEMPORARY` is specified, the - created function is valid and visible in the current session. No persistent - entry is made in the catalog for these kind of functions. + created function is valid and visible in the current session. Temporary functions live in the + per-session `system.session` namespace. No persistent entry is made in the catalog for these + kind of functions. * **IF NOT EXISTS** @@ -62,9 +63,19 @@ CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ] * **function_name** - Specifies a name of function to be created. The function name may be optionally qualified with a database name. + Specifies a name of function to be created. - **Syntax:** `[ database_name. ] function_name` + * For a **permanent** function the name may be optionally qualified with a database name + (or a catalog and database). If the name is not qualified the function is created in the + current schema. + + **Syntax:** `[ catalog_name. ] [ database_name. ] function_name` + + * For a **temporary** function the name may be optionally qualified with the session schema + (`session` or `system.session`). Any other qualifier is rejected with + `INVALID_TEMP_OBJ_QUALIFIER`. + + **Syntax:** `[ { session | system.session } . ] function_name` * **class_name** diff --git a/docs/sql-ref-syntax-ddl-create-sql-function.md b/docs/sql-ref-syntax-ddl-create-sql-function.md index 649cd895a1974..19f3e120f070f 100644 --- a/docs/sql-ref-syntax-ddl-create-sql-function.md +++ b/docs/sql-ref-syntax-ddl-create-sql-function.md @@ -58,7 +58,10 @@ characteristic - **TEMPORARY** - The scope of the function being created. When you specify `TEMPORARY`, the created function is valid and visible in the current session. No persistent entry is made in the catalog. + The scope of the function being created. When you specify `TEMPORARY`, the created function is + valid and visible in the current session. Temporary functions live in the per-session + `system.session` namespace and are dropped when the session ends. No persistent entry is made in + the catalog. - **IF NOT EXISTS** @@ -66,10 +69,23 @@ characteristic - **function_name** - A name for the function. For a permanent function, you can optionally qualify the function name, or it will be created under the current catalog and namespace. - If the name is not qualified the permanent function is created in the current schema. + A name for the function. - **Syntax:** `[ database_name. ] function_name` + * For a **permanent** function, you can optionally qualify the function name with a database name + (or a catalog and database). If the name is not qualified the permanent function is created in + the current schema. + + **Syntax:** `[ catalog_name. ] [ database_name. ] function_name` + + * For a **temporary** function, you can optionally qualify the function name with the session + schema (`session` or `system.session`). Any other qualifier — including + `system.builtin`, the current schema, or an arbitrary database name — is rejected with + `INVALID_TEMP_OBJ_QUALIFIER`. For example, `CREATE TEMPORARY FUNCTION session.f ...` and + `CREATE TEMPORARY FUNCTION system.session.f ...` are accepted. + + **Syntax:** `[ { session | system.session } . ] function_name` + + The function name must be unique among all routines (procedures and functions) in its schema. - **function_parameter** @@ -126,6 +142,15 @@ characteristic - [Ranking functions](sql-ref-functions-builtin.md#ranking-window-functions) - Row producing functions such as `explode` + A persistent SQL UDF cannot reference temporary views, temporary functions, or session + variables. + + The SQL Path in effect at `CREATE FUNCTION` time is captured into the function's metadata; the + body resolves against that frozen path on every invocation, not the invoker's current path. + `current_schema()` and `current_path()` inside the body still return the invoker's context. + Use [DESCRIBE FUNCTION EXTENDED](sql-ref-syntax-aux-describe-function.html) to inspect the + captured path. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). + Within the body of the function you can refer to parameter by its unqualified name or by qualifying the parameter with the function name. - **characteristic** @@ -296,8 +321,74 @@ characteristic Returns: INT ``` +### Create a temporary SQL function with a session qualifier + +```sql +-- Unqualified, `session`-qualified, and `system.session`-qualified names all create the same +-- temporary function in the per-session `system.session` namespace. +> CREATE TEMPORARY FUNCTION add_one(x INT) RETURNS INT RETURN x + 1; + +> CREATE OR REPLACE TEMPORARY FUNCTION session.add_one(x INT) RETURNS INT + RETURN x + 1; + +> CREATE OR REPLACE TEMPORARY FUNCTION system.session.add_one(x INT) RETURNS INT + RETURN x + 1; + +-- All three names refer to the same temporary function: +> SELECT add_one(1), session.add_one(1), system.session.add_one(1); + 2 2 2 + +-- DROP TEMPORARY FUNCTION accepts the same qualifiers: +> DROP TEMPORARY FUNCTION session.add_one; + +-- Any other qualifier on a TEMPORARY function is rejected. +> CREATE TEMPORARY FUNCTION mydb.bad_temp() RETURNS INT RETURN 1; + [INVALID_TEMP_OBJ_QUALIFIER] qualifier `mydb` is not allowed for temporary FUNCTION ... + +> CREATE TEMPORARY FUNCTION system.builtin.bad_temp() RETURNS INT RETURN 1; + [INVALID_TEMP_OBJ_QUALIFIER] qualifier `system`.`builtin` is not allowed for temporary FUNCTION ... +``` + +### Frozen SQL Path + +A SQL UDF captures the SQL Path that is in effect at `CREATE FUNCTION` time. The body resolves +against that frozen path on every invocation, even if the caller's session has set a different +PATH. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). + +```sql +> CREATE SCHEMA path_a; +> CREATE SCHEMA path_b; +> CREATE TABLE path_a.t USING parquet AS SELECT 10 AS id; +> CREATE TABLE path_b.t USING parquet AS SELECT 20 AS id; + +-- The PATH at CREATE FUNCTION time points at path_a, so unqualified `t` in the body binds to +-- path_a.t. +> SET PATH = spark_catalog.path_a, system.builtin; +> CREATE FUNCTION default.frozen_fn() RETURNS INT + RETURN (SELECT MAX(id) FROM t); + +-- Flip the live PATH. The function body still resolves `t` against the frozen path. +> SET PATH = spark_catalog.path_b, system.builtin; + +-- A bare query follows the LIVE path: +> SELECT MAX(id) FROM t; + 20 + +-- The function body follows its FROZEN path: +> SELECT default.frozen_fn(); + 10 + +-- DESCRIBE FUNCTION EXTENDED shows the captured path: +> DESC FUNCTION EXTENDED default.frozen_fn; + Function: spark_catalog.default.frozen_fn + ... + SQL Path: spark_catalog.path_a, system.builtin +``` + ### Related Statements * [SHOW FUNCTIONS](sql-ref-syntax-aux-show-functions.html) * [DESCRIBE FUNCTION](sql-ref-syntax-aux-describe-function.html) * [DROP FUNCTION](sql-ref-syntax-ddl-drop-function.html) +* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) +* [Name Resolution](sql-ref-name-resolution.html) diff --git a/docs/sql-ref-syntax-ddl-create-view.md b/docs/sql-ref-syntax-ddl-create-view.md index 2d832636b38fc..f6fc6c0e85c75 100644 --- a/docs/sql-ref-syntax-ddl-create-view.md +++ b/docs/sql-ref-syntax-ddl-create-view.md @@ -40,9 +40,11 @@ CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_ident * **[ GLOBAL ] TEMPORARY** - TEMPORARY views are session-scoped and will be dropped when session ends - because it skips persisting the definition in the underlying metastore, if any. - GLOBAL TEMPORARY views are tied to a system preserved temporary database `global_temp`. + `TEMPORARY` views are session-scoped and are dropped when the session ends; + no entry is persisted in the underlying metastore. + Temporary views live in the per-session `system.session` namespace. + + `GLOBAL TEMPORARY` views are tied to the system-preserved temporary database `global_temp`. * **IF NOT EXISTS** @@ -51,9 +53,23 @@ CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_ident * **view_identifier** - Specifies a view name, which may be optionally qualified with a database name. + Specifies a view name. + + * For a **persistent** view the name may be optionally qualified with a database name (or a + catalog and database). If the name is not qualified the view is created in the current + schema. + + **Syntax:** `[ catalog_name. ] [ database_name. ] view_name` - **Syntax:** `[ database_name. ] view_name` + * For a **temporary** view the name may be optionally qualified with the session schema + (`session` or `system.session`). Any other qualifier is rejected with + `INVALID_TEMP_OBJ_QUALIFIER`. For example, `CREATE TEMPORARY VIEW session.v ...` and + `CREATE TEMPORARY VIEW system.session.v ...` are accepted; `CREATE TEMPORARY VIEW mydb.v ...` + is not. + + **Syntax:** `[ { session | system.session } . ] view_name` + + The fully qualified view name must be unique within its schema. * **create_view_clauses** @@ -75,8 +91,16 @@ CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_ident The default is `WITH SCHEMA COMPENSATION`. * **query** + A [SELECT](sql-ref-syntax-qry-select.html) statement that constructs the view from base tables or other views. + A persistent view cannot reference temporary views, temporary functions, or session variables. + + For a persistent view, the SQL Path in effect at `CREATE VIEW` time is captured into the view's + metadata; the body resolves against that frozen path on every reference, not the invoker's + current path. Use [DESCRIBE EXTENDED](sql-ref-syntax-aux-describe-table.html) to inspect the + captured path. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). + ### Examples ```sql @@ -98,8 +122,74 @@ CREATE OR REPLACE VIEW open_orders WITH SCHEMA EVOLUTION AS SELECT * FROM orders WHERE status = 'open'; ``` +### Create a temporary view with a session qualifier + +```sql +-- Unqualified, `session`-qualified, and `system.session`-qualified names all create the same +-- temporary view in the per-session `system.session` namespace. +CREATE TEMPORARY VIEW recent_orders + AS SELECT * FROM orders WHERE order_date > current_date - INTERVAL 7 DAYS; + +CREATE OR REPLACE TEMPORARY VIEW session.recent_orders + AS SELECT * FROM orders WHERE order_date > current_date - INTERVAL 7 DAYS; + +CREATE OR REPLACE TEMPORARY VIEW system.session.recent_orders + AS SELECT * FROM orders WHERE order_date > current_date - INTERVAL 7 DAYS; + +-- All three names address the same temporary view: +SELECT count(*) FROM recent_orders; +SELECT count(*) FROM session.recent_orders; +SELECT count(*) FROM system.session.recent_orders; + +-- DROP VIEW accepts the same qualifiers (there is no DROP TEMPORARY VIEW form): +DROP VIEW session.recent_orders; + +-- Any other qualifier on a TEMPORARY view is rejected. +CREATE TEMPORARY VIEW mydb.bad_temp AS SELECT 1; + [INVALID_TEMP_OBJ_QUALIFIER] qualifier `mydb` is not allowed for temporary VIEW ... + +CREATE TEMPORARY VIEW system.builtin.bad_temp AS SELECT 1; + [INVALID_TEMP_OBJ_QUALIFIER] qualifier `system`.`builtin` is not allowed for temporary VIEW ... +``` + +### Frozen SQL Path + +A persistent view captures the SQL Path that is in effect at `CREATE VIEW` time. The view body +resolves against that frozen path on every reference, even when the caller's session has set a +different PATH. See [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html). + +```sql +> CREATE SCHEMA views_a; +> CREATE SCHEMA views_b; +> CREATE TABLE views_a.t USING parquet AS SELECT 1 AS id; +> CREATE TABLE views_b.t USING parquet AS SELECT 2 AS id; + +-- The PATH at CREATE VIEW time points at views_a, so unqualified `t` in the view body binds to +-- views_a.t. +> SET PATH = spark_catalog.views_a, system.builtin; +> CREATE VIEW default.v_frozen AS SELECT id FROM t; + +-- Flip the live PATH. The view body still resolves `t` against the frozen path. +> SET PATH = spark_catalog.views_b, system.builtin; + +-- A bare query follows the LIVE path: +> SELECT id FROM t; + 2 + +-- The view body follows its FROZEN path: +> SELECT id FROM default.v_frozen; + 1 + +-- DESCRIBE EXTENDED shows the captured path: +> DESCRIBE EXTENDED default.v_frozen; + ... + SQL Path spark_catalog.views_a, system.builtin +``` + ### Related Statements * [ALTER VIEW](sql-ref-syntax-ddl-alter-view.html) * [DROP VIEW](sql-ref-syntax-ddl-drop-view.html) * [SHOW VIEWS](sql-ref-syntax-aux-show-views.html) +* [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) +* [Name Resolution](sql-ref-name-resolution.html) diff --git a/docs/sql-ref-syntax-ddl-drop-function.md b/docs/sql-ref-syntax-ddl-drop-function.md index bef31d74afcff..b9272e34b81d6 100644 --- a/docs/sql-ref-syntax-ddl-drop-function.md +++ b/docs/sql-ref-syntax-ddl-drop-function.md @@ -34,14 +34,18 @@ DROP [ TEMPORARY ] FUNCTION [ IF EXISTS ] function_name * **function_name** - Specifies the name of an existing function. The function name may be - optionally qualified with a database name. + Specifies the name of an existing function. With `TEMPORARY`, the name may optionally be + qualified with `session` or `system.session`. Without `TEMPORARY`, the name may optionally be + qualified with a database (or a catalog and database) and resolves to a persistent function. - **Syntax:** `[ database_name. ] function_name` + **Syntax:** `[ catalog_name. ] [ database_name. ] function_name` + + Functions in `system.builtin` cannot be dropped. * **TEMPORARY** - Should be used to delete the `TEMPORARY` function. + Required to drop a temporary function. Without `TEMPORARY`, `DROP FUNCTION` only considers + persistent functions. * **IF EXISTS** diff --git a/docs/sql-ref-syntax-ddl-drop-view.md b/docs/sql-ref-syntax-ddl-drop-view.md index 5b680d7f907e0..16f711a9074eb 100644 --- a/docs/sql-ref-syntax-ddl-drop-view.md +++ b/docs/sql-ref-syntax-ddl-drop-view.md @@ -37,9 +37,11 @@ DROP VIEW [ IF EXISTS ] view_identifier * **view_identifier** - Specifies the view name to be dropped. The view name may be optionally qualified with a database name. + Specifies the view name to be dropped. The name may be optionally qualified with a database + name (or a catalog and database). A name qualified with `session` or `system.session` + targets a temporary view. - **Syntax:** `[ database_name. ] view_name` + **Syntax:** `[ catalog_name. ] [ database_name. ] view_name` ### Examples @@ -53,12 +55,20 @@ DROP VIEW userdb.employeeView; -- Assumes a view named `employeeView` does not exist. -- Throws exception DROP VIEW employeeView; -Error: org.apache.spark.sql.AnalysisException: Table or view not found: employeeView; -(state=,code=0) +Error: TABLE_OR_VIEW_NOT_FOUND -- Assumes a view named `employeeView` does not exist,Try with IF EXISTS -- this time it will not throw exception DROP VIEW IF EXISTS employeeView; + +-- A temporary view that shadows a persistent view with the same name. +-- An unqualified DROP VIEW drops the temporary view first; qualifying with `session` +-- always targets the temporary view explicitly. +CREATE VIEW default.recent_orders AS SELECT * FROM orders WHERE order_date > current_date - 7; +CREATE TEMPORARY VIEW recent_orders AS SELECT * FROM orders WHERE order_date = current_date; + +DROP VIEW session.recent_orders; -- drops the temporary view +DROP VIEW default.recent_orders; -- drops the persistent view ``` ### Related Statements diff --git a/docs/sql-ref-syntax-qry-select-join.md b/docs/sql-ref-syntax-qry-select-join.md index 698884dc28b57..646297831d1cc 100644 --- a/docs/sql-ref-syntax-qry-select-join.md +++ b/docs/sql-ref-syntax-qry-select-join.md @@ -26,7 +26,7 @@ A SQL join is used to combine rows from two relations based on join criteria. Th ### Syntax ```sql -relation { [ join_type ] JOIN [ LATERAL ] relation [ join_criteria ] | NATURAL join_type JOIN [ LATERAL ] relation } +relation { [ join_type ] JOIN [ LATERAL ] relation [ join_criteria | nearest_by_clause ] | NATURAL join_type JOIN [ LATERAL ] relation } ``` ### Parameters @@ -53,6 +53,30 @@ relation { [ join_type ] JOIN [ LATERAL ] relation [ join_criteria ] | NATURAL j Specifies an expression with a return type of boolean. +* **nearest_by_clause** + + Specifies a nearest-by top-K ranking join. For each row on the left (query side), returns up to `num_results` rows from the right (base side), ranked by `ranking_expression`. Only `INNER` (the default) and `LEFT OUTER` join types are supported with this clause. + + **Syntax:** `{ APPROX | EXACT } NEAREST [ num_results ] BY { DISTANCE | SIMILARITY } ranking_expression` + + `APPROX | EXACT` + + Controls the search algorithm contract. `APPROX` allows the optimizer to use faster approximate strategies (such as indexed nearest-neighbor search when available). `EXACT` forces brute-force evaluation. + + `num_results` + + A positive integer literal between 1 and 100000 that limits the number of matches per left row. Defaults to 1 when omitted. + + `DISTANCE | SIMILARITY` + + `DISTANCE` ranks rows by smallest value of `ranking_expression` first. `SIMILARITY` ranks rows by largest value first. Matched right-side rows are emitted in best-first order: smallest ranking value first under `DISTANCE`, largest first under `SIMILARITY`. (Downstream operators may reorder; add an explicit `ORDER BY` if you need to lock in the ordering.) + + `ranking_expression` + + A scalar expression that returns an orderable type. The expression is evaluated once per (left, right) pair on the brute-force path, so avoid expensive or side-effecting UDFs in ranking expressions. + + **Performance note.** The current implementation evaluates the full cross-product of the left and right sides and bounds memory per left row by `num_results`. Per-query work is `O(|left| × |right| × log num_results)`. Index-backed approximate strategies (transparent to `APPROX` queries) are planned in a future release; until then, pre-filter the right side (e.g. via a subquery) when it is large. + ### Join Types #### **Inner Join** diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md index d8c37dc021985..1e0ea4a2b8d64 100644 --- a/docs/sql-ref-syntax.md +++ b/docs/sql-ref-syntax.md @@ -29,7 +29,8 @@ Data Definition Statements are used to create or modify the structure of databas * [ALTER TABLE](sql-ref-syntax-ddl-alter-table.html) * [ALTER VIEW](sql-ref-syntax-ddl-alter-view.html) * [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html) - * [CREATE FUNCTION](sql-ref-syntax-ddl-create-function.html) + * [CREATE FUNCTION (External)](sql-ref-syntax-ddl-create-function.html) + * [CREATE FUNCTION (SQL)](sql-ref-syntax-ddl-create-sql-function.html) * [CREATE TABLE](sql-ref-syntax-ddl-create-table.html) * [CREATE VIEW](sql-ref-syntax-ddl-create-view.html) * [DECLARE VARIABLE](sql-ref-syntax-ddl-declare-variable.html) @@ -123,6 +124,7 @@ You use SQL scripting to execute procedural logic in SQL. * [REFRESH FUNCTION](sql-ref-syntax-aux-cache-refresh-function.html) * [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html) * [SET](sql-ref-syntax-aux-conf-mgmt-set.html) + * [SET PATH](sql-ref-syntax-aux-conf-mgmt-set-path.html) * [SET VAR](sql-ref-syntax-aux-set-var.html) * [SHOW COLLATIONS](sql-ref-syntax-aux-show-collations.html) * [SHOW COLUMNS](sql-ref-syntax-aux-show-columns.html) diff --git a/docs/web-ui.md b/docs/web-ui.md index 3889b41f03a04..6ae0a363d1873 100644 --- a/docs/web-ui.md +++ b/docs/web-ui.md @@ -28,6 +28,31 @@ to monitor the status and resource consumption of your Spark cluster. * This will become a table of contents (this text will be scraped). {:toc} +## Overview + +The Web UI is built into every Spark application: while the application is +running, it serves a set of web pages that let you inspect what is happening +inside it. Typical uses include monitoring a running job, diagnosing a +failure, analyzing the execution plan of a slow SQL query, and checking how +memory and tasks are distributed across executors. + +By default the Web UI is available at `http://:4040`. When that +port is already in use (for example, when several Spark applications run on +the same host), Spark tries `4041`, `4042`, and so on until it finds a free +port, and logs the chosen port at startup. You can override the default port +with `spark.ui.port`, and tune other UI behavior through the `spark.ui.*` +properties documented in the [Configuration](configuration.html#spark-ui) +reference. + +The Web UI is tied to the lifetime of the application: once it exits, the UI +is no longer reachable. To inspect an application after it has finished, +enable event logging and run the Spark History Server, which reconstructs an +equivalent UI from the persisted event log; see +[Monitoring and Instrumentation](monitoring.html) for setup details. + +The remaining sections walk through each tab in the Web UI's top navigation +bar. + ## Jobs Tab The Jobs tab displays a summary page of all jobs in the Spark application and a details page for each job. The summary page shows high-level information, such as the status, duration, and @@ -35,64 +60,33 @@ progress of all jobs and the overall event timeline. When you click on a job on page, you see the details page for that job. The details page further shows the event timeline, DAG visualization, and all stages of the job. -The information that is displayed in this section is -* User: Current Spark user -* Started At: The startup time of Spark application -* Total uptime: Time since Spark application started +The information displayed at the top of the page includes: + * Scheduling mode: See [job scheduling](job-scheduling.html#configuring-pool-properties) * Number of jobs per status: Active, Completed, Failed - -

    - Basic info -

    - * Event timeline: Displays in chronological order the events related to the executors (added, removed) and the jobs - -

    - Event timeline -

    - * Details of jobs grouped by status: Displays detailed information of the jobs including Job ID, description (with a link to detailed job page), submitted time, duration, stages summary and tasks progress bar +The current user, application start time, and total uptime are shown in the footer at the +bottom of every page. +

    - Details of jobs grouped by status + All Jobs page

    - -When you click on a specific job, you can see the detailed information of this job. - ### Jobs detail This page displays the details of a specific job identified by its job ID. + * Job Status: (running, succeeded, failed) * Number of stages per status (active, pending, completed, skipped, failed) -* Associated SQL Query: Link to the sql tab for this job +* Associated SQL Query: Link to the SQL tab for this job * Event timeline: Displays in chronological order the events related to the executors (added, removed) and the stages of the job +* DAG visualization: Visual representation of the directed acyclic graph of this job where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied on RDD +* List of stages (grouped by state active, pending, completed, skipped, and failed), with columns including Stage ID, description, submitted timestamp, duration, tasks progress bar, **Input** (bytes read from storage), **Output** (bytes written to storage), **Shuffle read** (total shuffle bytes and records read locally and from remote executors), and **Shuffle write** (bytes and records written to disk for a future shuffle)

    - Event timeline -

    - -* DAG visualization: Visual representation of the directed acyclic graph of this job where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied on RDD. -* An example of DAG visualization for `sc.parallelize(1 to 100).toDF.count()` - -

    - DAG -

    - -* List of stages (grouped by state active, pending, completed, skipped, and failed) - * Stage ID - * Description of the stage - * Submitted timestamp - * Duration of the stage - * Tasks progress bar - * Input: Bytes read from storage in this stage - * Output: Bytes written in storage in this stage - * Shuffle read: Total shuffle bytes and records read, includes both data read locally and data read from remote executors - * Shuffle write: Bytes and records written to disk in order to be read by a shuffle in a future stage - -

    - DAG + Job detail page

    ## Stages Tab @@ -100,41 +94,36 @@ This page displays the details of a specific job identified by its job ID. The Stages tab displays a summary page that shows the current state of all stages of all jobs in the Spark application. -At the beginning of the page is the summary with the count of all stages by status (active, pending, completed, skipped, and failed) +At the top of the page is a summary with the count of all stages by status (active, pending, +completed, skipped, and failed). In [Fair scheduling mode](job-scheduling.html#scheduling-within-an-application) +a table of [pool properties](job-scheduling.html#configuring-pool-properties) is also shown. -

    - Stages header -

    - -In [Fair scheduling mode](job-scheduling.html#scheduling-within-an-application) there is a table that displays [pools properties](job-scheduling.html#configuring-pool-properties) +Below the summary are the stages, grouped by status (active, pending, completed, skipped, failed). +An active stage shows a small **(kill)** link next to its description; clicking it asks Spark +to cancel that stage. Only failed stages show the failure reason. Click a stage's description +to open its [Stage detail](#stage-detail) page.

    - Pool properties -

    - -After that are the details of stages per status (active, pending, completed, skipped, failed). In active stages, it's possible to kill the stage with the kill link. Only in failed stages, failure reason is shown. Task detail can be accessed by clicking on the description. - -

    - Stages detail + Stages tab

    ### Stage detail -The stage detail page begins with information like total time across all tasks, [Locality level summary](tuning.html#data-locality), [Shuffle Read Size / Records](rdd-programming-guide.html#shuffle-operations) and Associated Job IDs. -

    - Stage header -

    +The stage detail page begins with information like total time across all tasks, +[Locality level summary](tuning.html#data-locality), +[Shuffle Read Size / Records](rdd-programming-guide.html#shuffle-operations) and Associated Job IDs. -There is also a visual representation of the directed acyclic graph (DAG) of this stage, where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied. -Nodes are grouped by operation scope in the DAG visualization and labelled with the operation scope name (BatchScan, WholeStageCodegen, Exchange, etc). -Notably, Whole Stage Code Generation operations are also annotated with the code generation id. For stages belonging to Spark DataFrame or SQL execution, this allows to cross-reference Stage execution details to the relevant details in the Web-UI SQL Tab page where SQL plan graphs and execution plans are reported. +It also shows a visual representation of the directed acyclic graph (DAG) of this stage, +where vertices represent the RDDs or DataFrames and the edges represent an operation to be +applied. Nodes are grouped by operation scope in the DAG visualization and labelled with the +operation scope name (`BatchScan`, `WholeStageCodegen`, `Exchange`, etc). +Notably, whole-stage code generation operations are also annotated with the code generation id. +For stages belonging to Spark DataFrame or SQL execution, this allows you to cross-reference +stage execution details to the relevant query in the [SQL Tab](#sql-tab). -

    - Stage DAG -

    +Summary metrics for all tasks are represented in a table and in a timeline: -Summary metrics for all task are represented in a table and in a timeline. -* **[Tasks deserialization time](configuration.html#compression-and-serialization)** +* **Task deserialization time** is the time spent deserializing the task closure on an executor before it can run. * **Duration of tasks**. * **GC time** is the total JVM garbage collection time. * **Result serialization time** is the time spent serializing the task result on an executor before sending it back to the driver. @@ -148,26 +137,14 @@ Summary metrics for all task are represented in a table and in a timeline. * **Shuffle spill (memory)** is the size of the deserialized form of the shuffled data in memory. * **Shuffle spill (disk)** is the size of the serialized form of the data on disk. -

    - Stages metrics -

    - -Aggregated metrics by executor show the same information aggregated by executor. - -

    - Stages metrics per executors -

    - -**[Accumulators](rdd-programming-guide.html#accumulators)** are a type of shared variables. It provides a mutable variable that can be updated inside of a variety of transformations. It is possible to create accumulators with and without name, but only named accumulators are displayed. +The same metrics are also shown aggregated by executor. +**[Accumulators](rdd-programming-guide.html#accumulators)** are shared variables that can be +updated inside transformations; only named accumulators are displayed here. Finally, a tasks +table shows the same information broken down per task, with links to executor logs and the task +attempt number for failures.

    - Stage accumulator -

    - -Tasks details basically includes the same information as in the summary section but detailed by task. It also includes links to review the logs and the task attempt number if it fails for any reason. If there are named accumulators, here it is possible to see the accumulator value at the end of each task. - -

    - Tasks + Stage detail

    ## Storage Tab @@ -224,8 +201,11 @@ distribution on the cluster. ## Environment Tab -The Environment tab displays the values for the different environment and configuration variables, -including JVM, Spark, and system properties. + +The Environment tab is the place to verify that your Spark application is +running with the configuration you expect. It groups the environment and +configuration information into a set of sub-tabs along the left side of the +page; clicking one switches the panel on the right.

    -This environment page has five parts. It is a useful place to check whether your properties have -been set correctly. -The first part 'Runtime Information' simply contains the [runtime properties](configuration.html#runtime-environment) -like versions of Java and Scala. -The second part 'Spark Properties' lists the [application properties](configuration.html#application-properties) like -['spark.app.name'](configuration.html#application-properties) and 'spark.driver.memory'. - -

    - Hadoop Properties - -

    -Clicking the 'Hadoop Properties' link displays properties relative to Hadoop and YARN. Note that properties like -['spark.hadoop.*'](configuration.html#execution-behavior) are shown not in this part but in 'Spark Properties'. - -

    - System Properties - -

    -'System Properties' shows more details about the JVM. - -

    - Classpath Entries - -

    - -The last part 'Classpath Entries' lists the classes loaded from different sources, which is very useful -to resolve class conflicts. +The sub-tabs are: + +* **Runtime Information** — JVM, Scala, and other + [runtime properties](configuration.html#runtime-environment) of the driver. +* **Spark Properties** — the effective + [application properties](configuration.html#application-properties) + (such as `spark.app.name` and `spark.driver.memory`). Note that + [`spark.hadoop.*`](configuration.html#execution-behavior) properties are + listed here, not under Hadoop Properties. +* **Resource Profiles** — CPU, memory, and accelerator resource + requests for each [resource profile](configuration.html#stage-level-scheduling-overview) + in use. +* **Hadoop Properties** — values loaded from Hadoop and YARN configuration + files. +* **System Properties** — the underlying JVM system properties. +* **Metrics Properties** — the configuration loaded for the + [metrics system](monitoring.html#metrics). +* **Classpath Entries** — the classes loaded into the driver, broken + down by source. Handy when tracking down class conflicts. ## Executors Tab -The Executors tab displays summary information about the executors that were created for the -application, including memory and disk usage and task and shuffle information. The Storage Memory -column shows the amount of memory used and reserved for caching data. +The Executors tab lists every executor that has been allocated to the +application, including the driver. Each row shows resource usage (memory, +disk, cores), storage memory reserved for cached data, task counts, shuffle +totals, and performance signals such as +[GC time](tuning.html#garbage-collection-tuning).

    -The Executors tab provides not only resource information (amount of memory, disk, and cores used by each executor) -but also performance information ([GC time](tuning.html#garbage-collection-tuning) and shuffle information). - -

    - Stderr Log - -

    - -Clicking the 'stderr' link of executor 0 displays detailed [standard error log](spark-standalone.html#monitoring-and-logging) -in its console. - -

    - Thread Dump - -

    - -Clicking the 'Thread Dump' link of executor 0 displays the thread dump of JVM on executor 0, which is pretty useful -for performance analysis. +Each row carries a set of detail links — **Thread Dump**, **Heap +Histogram**, and **Flame Graph** — that open the corresponding live +data for that executor in a side panel without leaving the page. The panel +can be resized by dragging its left edge. The **stderr** and **stdout** +links open the executor's log files in a new view; the exact location of +those logs depends on your cluster manager (see +[Monitoring and Instrumentation](monitoring.html) for details). ## SQL Tab -If the application executes Spark SQL queries, the SQL tab displays information, such as the duration, -jobs, and physical and logical plans for the queries. Here we include a basic example to illustrate -this tab: -{% highlight scala %} -scala> val df = Seq((1, "andy"), (2, "bob"), (2, "andy")).toDF("count", "name") -df: org.apache.spark.sql.DataFrame = [count: int, name: string] -scala> df.count -res0: Long = 3 +### Query Listing -scala> df.createGlobalTempView("df") +The SQL tab lists all SQL and DataFrame queries submitted to the Spark +application. Any DataFrame action that triggers execution (such as `count`, +`show`, or `write`) shows up here, not only queries written as SQL strings. +Here is a short example that produces a few entries: -scala> spark.sql("select name,sum(count) from global_temp.df group by name").show -+----+----------+ -|name|sum(count)| -+----+----------+ -|andy| 3| -| bob| 2| -+----+----------+ +{% highlight python %} +df = spark.createDataFrame([(1, "andy"), (2, "bob"), (2, "andy")], ["count", "name"]) +df.count() +df.createOrReplaceTempView("df") +spark.sql("SELECT name, SUM(count) FROM df GROUP BY name").show() {% endhighlight %}

    @@ -340,44 +282,47 @@ scala> spark.sql("select name,sum(count) from global_temp.df group by name").sho

    -Now the above three dataframe/SQL operators are shown in the list. If we click the -'show at \: 24' link of the last query, we will see the DAG and details of the query execution. - -

    - SQL DAG - -

    +The listing supports sorting by column, searching, filtering by status, +and pagination, which makes it easy to locate a specific query in +long-running applications. -The query details page displays information about the query execution time, its duration, -the list of associated jobs, and the query execution DAG. -The first block 'WholeStageCodegen (1)' compiles multiple operators ('LocalTableScan' and 'HashAggregate') together into a single Java -function to improve performance, and metrics like number of rows and spill size are listed in the block. -The annotation '(1)' in the block name is the code generation id. -The second block 'Exchange' shows the metrics on the shuffle exchange, including -number of written shuffle records, total data size, etc. +### SQL Plan Visualization +Each query in the listing has a graph view of its operators. Every node +shows the operator name together with its metrics inline, and the edges +follow the data flow. You can pan and zoom the graph to navigate large +plans, search for a node by name, and click any node to open a side panel +with its full details.

    - logical plans and the physical plan

    -Clicking the 'Details' link on the bottom displays the logical plans and the physical plan, which -illustrate how Spark parses, analyzes, optimizes and performs the query. -Steps in the physical plan subject to whole stage code generation optimization, are prefixed by a star followed by -the code generation id, for example: '*(1) LocalTableScan' + +### Execution Detail Page + +The execution detail page, opened by clicking the **ID** or **Description** +link of any row in the query listing, gathers everything recorded for a +single query. The header lists the +query's submission time, duration, status, description, and the jobs and +stages associated with it. The +[SQL Plan Visualization](#sql-plan-visualization) shows the graph of +operators. At the bottom of the page, a "Details" link expands the full +text of the parsed, analyzed, and optimized logical plans together with +the physical plan, useful when you want to see how Spark transformed your +query during planning. ### SQL metrics -The metrics of SQL operators are shown in the block of physical operators. The SQL metrics can be useful -when we want to dive into the execution details of each operator. For example, "number of output rows" -can answer how many rows are output after a Filter operator, "shuffle bytes written total" in an Exchange -operator shows the number of bytes written by a shuffle. +Each node in the [SQL Plan Visualization](#sql-plan-visualization) carries +its own metrics inline. These metrics are useful when you want to dive into +the execution details of each operator. For example, `number of output rows` +shows how many rows pass through a `Filter` operator, and +`shuffle bytes written` in an `Exchange` shows how much data the +shuffle wrote. Here is the list of SQL metrics: diff --git a/examples/pom.xml b/examples/pom.xml index 30a728cf8abad..7edcc47c2a2ff 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 7507186480adf..0d7048731b297 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index e64023e3b3e36..a308783f8a120 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala b/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala index 44a521bd636c5..ae4f5660facab 100644 --- a/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala +++ b/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala @@ -115,6 +115,17 @@ class PathOutputCommitProtocol( // failures. Warn logTrace(s"Committer $committer may not be tolerant of task commit failures") } + + if (dynamicPartitionOverwrite) { + // FileOutputCommitter must be initialized with the staging directory so that task output + // lands under stagingDir/_temporary/... and commitJob can later delete the old partition + // directories and move staged files to final dest. Without this, the committer writes + // directly to the final path and the dynamic-overwrite cleanup in commitJob never sees any + // partitionPaths. + val ctor = + committer.getClass.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext]) + committer = ctor.newInstance(stagingDir, context) + } } else { // if required other committers need to be checked for dynamic partition // compatibility through a StreamCapabilities probe. @@ -161,6 +172,11 @@ class PathOutputCommitProtocol( }.getOrElse(workDir) val file = new Path(parent, getFilename(taskContext, spec)) logTrace(s"Creating task file $file for dir $dir and spec $spec") + if (dynamicPartitionOverwrite && committer.isInstanceOf[FileOutputCommitter]) { + assert(dir.isDefined, + "The dataset to be written must be partitioned when dynamicPartitionOverwrite is true.") + partitionPaths += dir.get + } file.toString } diff --git a/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala b/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala index ee89adab94fb9..954d9011f3bbe 100644 --- a/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala +++ b/hadoop-cloud/src/test/scala/org/apache/spark/internal/io/cloud/CommitterBindingSuite.scala @@ -30,6 +30,19 @@ import org.apache.spark.internal.io.{FileCommitProtocol, FileNameSpec} import org.apache.spark.internal.io.cloud.PathOutputCommitProtocol.{CAPABILITY_DYNAMIC_PARTITIONING, OUTPUTCOMMITTER_FACTORY_SCHEME} import org.apache.spark.network.util.JavaUtils +/** + * Subclass that exposes the protected `partitionPaths` field so tests can + * assert on it without going through the full `commitTask` path (which + * requires `SparkEnv`). + */ +private class PathOutputCommitProtocolForTest( + jobId: String, + dest: String, + dynamicPartitionOverwrite: Boolean) + extends PathOutputCommitProtocol(jobId, dest, dynamicPartitionOverwrite) { + def capturedPartitionPaths: Set[String] = partitionPaths.toSet +} + class CommitterBindingSuite extends SparkFunSuite { private val jobId = "2007071202143_0101" @@ -264,5 +277,107 @@ class CommitterBindingSuite extends SparkFunSuite { "org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory") } -} + /** + * With dynamicPartitionOverwrite=true and a FileOutputCommitter, newTaskTempFile must route + * output through the staging directory (not the final output path) and must record the partition + * in partitionPaths so that commitJob can delete the old partition directory and rename the + * staged one into place. + */ + test("SPARK-56588: FileOutputCommitter dynamic partition overwrite stages output and tracks " + + "partitions") { + val jobCommitDir = File.createTempFile("dyn-part-overwrite-staging", "") + try { + jobCommitDir.delete() + val jobUri = jobCommitDir.toURI + val path = new Path(jobUri) + val job = newJob(path) + val conf = job.getConfiguration + conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttempt0) + conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1) + bindToFileOutputCommitterFactory(conf, "file") + val tContext = new TaskAttemptContextImpl(conf, taskAttemptId0) + val committer = new PathOutputCommitProtocolForTest(jobId, jobUri.toString, true) + committer.setupJob(tContext) + committer.setupTask(tContext) + + val spec = FileNameSpec("", ".parquet") + val partition = "a=1/b=2" + val tempPath = committer.newTaskTempFile(tContext, Some(partition), spec) + + // The temp file must be under the staging directory, not the final output path. + assert(tempPath.contains(".spark-staging-"), + s"Expected temp path under staging dir, got: $tempPath") + assert(!tempPath.startsWith(path.toUri.toString.stripSuffix("/") + "/" + partition), + s"Temp path must not point directly to the final output location: $tempPath") + + // The partition must have been recorded so commitJob can overwrite it. + assert(committer.capturedPartitionPaths === Set(partition), + s"Expected partitionPaths = {$partition}, got: ${committer.capturedPartitionPaths}") + } finally { + jobCommitDir.delete() + } + } + + /** + * A cloud committer that handles dynamic partitioning natively (via StreamCapabilities) must NOT + * have its partitions tracked in Spark's partitionPaths set: the committer takes care of + * overwriting itself, and the commitJob rename loop must not interfere. + */ + test("SPARK-56588: Cloud committer with dynamic partition support does not track partitions in " + + "partitionPaths") { + val path = new Path("http://example/data") + val job = newJob(path) + val conf = job.getConfiguration + conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttempt0) + conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1) + StubPathOutputCommitterBinding.bindWithDynamicPartitioning(conf, "http") + val tContext = new TaskAttemptContextImpl(conf, taskAttemptId0) + val committer = new PathOutputCommitProtocolForTest(jobId, path.toUri.toString, true) + committer.setupJob(tContext) + committer.setupTask(tContext) + + val tempPath = committer.newTaskTempFile(tContext, Some("a=1"), FileNameSpec("", ".parquet")) + + // The temp file must be under the committer's own work dir (path/_temporary), + // not written directly to the final output location. + val expectedWorkDir = path.toUri.toString.stripSuffix("/") + "/_temporary" + assert(tempPath.startsWith(expectedWorkDir), + s"Expected temp path under committer work dir ($expectedWorkDir), got: $tempPath") + assert(committer.capturedPartitionPaths.isEmpty, + s"partitionPaths must stay empty for cloud committers that handle " + + s"dynamic partition overwrite natively, " + + s"got: ${committer.capturedPartitionPaths}") + } + + /** + * Without dynamicPartitionOverwrite, partitionPaths must remain empty even for + * FileOutputCommitter (baseline: existing behaviour must not regress). + */ + test("SPARK-56588: FileOutputCommitter without dynamicPartitionOverwrite does not track " + + "partitions") { + val jobCommitDir = File.createTempFile("no-dyn-part-overwrite", "") + try { + jobCommitDir.delete() + val jobUri = jobCommitDir.toURI + val path = new Path(jobUri) + val job = newJob(path) + val conf = job.getConfiguration + conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttempt0) + conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1) + bindToFileOutputCommitterFactory(conf, "file") + val tContext = new TaskAttemptContextImpl(conf, taskAttemptId0) + val committer = new PathOutputCommitProtocolForTest(jobId, jobUri.toString, false) + committer.setupJob(tContext) + committer.setupTask(tContext) + + committer.newTaskTempFile(tContext, Some("a=1"), FileNameSpec("", ".parquet")) + + assert(committer.capturedPartitionPaths.isEmpty, + s"partitionPaths must be empty when dynamicPartitionOverwrite=false, " + + s"got: ${committer.capturedPartitionPaths}") + } finally { + jobCommitDir.delete() + } + } +} diff --git a/launcher/pom.xml b/launcher/pom.xml index 811bdd47d1873..a3443e4478391 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java index ec3c030723ce3..feab843804228 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java +++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java @@ -46,7 +46,7 @@ public class JavaModuleOptions { "-Dio.netty.tryReflectionSetAccessible=true", "-Dio.netty.allocator.type=pooled", "-Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE", - "-Dio.netty.noUnsafe=false", + "--sun-misc-unsafe-memory-access=allow", "--enable-native-access=ALL-UNNAMED"}; /** diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 9c7f51eb0885f..f4302f99265be 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 80d2866c59e12..e2f4aff537abe 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 ../pom.xml diff --git a/pom.xml b/pom.xml index c5220a470f722..0c0f250073b8a 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.13 - 4.2.0.1-4.3.0-0 + 4.2.0.1-4.3.0-1 pom Spark Project Parent POM https://spark.apache.org/ @@ -217,8 +217,12 @@ 1.11.0 1.84 1.20.0 + 6.2.0 - 4.2.12.Final + 4.2.13.Final 2.0.76.Final 78.3 6.0.3 @@ -338,8 +342,9 @@ -Dio.netty.tryReflectionSetAccessible=true -Dio.netty.allocator.type=pooled -Dio.netty.handler.ssl.defaultEndpointVerificationAlgorithm=NONE - -Dio.netty.noUnsafe=false + --sun-misc-unsafe-memory-access=allow --enable-native-access=ALL-UNNAMED + -XX:+EnableDynamicAgentLoading 3.5.7 9.6.0 @@ -3274,6 +3279,11 @@ org.apache.maven.plugins maven-source-plugin + org.scalastyle scalastyle-maven-plugin @@ -3290,13 +3300,6 @@ ${project.build.sourceEncoding} ${project.reporting.outputEncoding} - - - - check - - - org.apache.maven.plugins @@ -3434,6 +3437,32 @@ + + + scalastyle + + + + + org.scalastyle + scalastyle-maven-plugin + + + + check + + + + + + + + +