arenadata · giggsoff · Jun 15, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -30,13 +30,10 @@ jobs:
         include:
           - name: "core / utils / tags"
             slug: "core-utils-tags"
-            modules: ":spark-core_2.13,:spark-launcher_2.13,:spark-network-common_2.13,:spark-network-shuffle_2.13,:spark-network-yarn_2.13,:spark-unsafe_2.13,:spark-kvstore_2.13,:spark-tags_2.13,:spark-sketch_2.13,:spark-common-utils_2.13"
-          - name: "graphx / examples / repl"
-            slug: "graphx-examples-repl"
-            modules: ":spark-graphx_2.13,:spark-examples_2.13,:spark-repl_2.13"
-          - name: "catalyst / sql-api / hive-thriftserver"
-            slug: "catalyst-sql-api-hive-thriftserver"
-            modules: ":spark-sql-api_2.13,:spark-catalyst_2.13,:spark-hive-thriftserver_2.13"
+            modules: ":spark-core_2.13,:spark-launcher_2.13,:spark-network-common_2.13,:spark-network-shuffle_2.13,:spark-network-yarn_2.13,:spark-unsafe_2.13,:spark-kvstore_2.13,:spark-tags_2.13,:spark-sketch_2.13,:spark-common-utils_2.13,:spark-common-utils-java_2.13,:spark-udf-worker-core_2.13"
+          - name: "catalyst / sql-api / hive-thriftserver / pipelines / graphx / examples / repl"
+            slug: "catalyst-graphx"
+            modules: ":spark-sql-api_2.13,:spark-catalyst_2.13,:spark-hive-thriftserver_2.13,:spark-pipelines_2.13,:spark-graphx_2.13,:spark-examples_2.13,:spark-repl_2.13"
           - name: "sql - extended tests"
             slug: "sql"
             modules: ":spark-sql_2.13"
@@ -52,10 +49,19 @@ jobs:
           - name: "hive"
             slug: "hive"
             modules: ":spark-hive_2.13"
-          - name: "streaming / mllib / yarn / k8s / connect / protobuf / kafka / avro"
-            slug: "streaming-mllib-yarn-k8s-connect-protobuf-kafka-avro"
-            modules: ":spark-streaming_2.13,:spark-sql-kafka-0-10_2.13,:spark-streaming-kafka-0-10_2.13,:spark-token-provider-kafka-0-10_2.13,:spark-mllib-local_2.13,:spark-mllib_2.13,:spark-yarn_2.13,:spark-kubernetes_2.13,:spark-hadoop-cloud_2.13,:spark-connect_2.13,:spark-connect-common_2.13,:spark-connect-client-jvm_2.13,:spark-protobuf_2.13,:spark-avro_2.13,:spark-assembly_2.13"
+          - name: "mllib"
+            slug: "mllib"
+            modules: ":spark-mllib-local_2.13,:spark-mllib_2.13"
+          - name: "connect / protobuf"
+            slug: "connect-protobuf"
+            modules: ":spark-connect_2.13,:spark-connect-common_2.13,:spark-connect-client-jvm_2.13,:spark-connect-client-jdbc_2.13,:spark-protobuf_2.13"
             extra: -Dtest.exclude.tags=org.apache.spark.tags.AmmoniteTest
+          - name: "streaming / kafka / avro"
+            slug: "streaming-kafka-avro"
+            modules: ":spark-streaming_2.13,:spark-sql-kafka-0-10_2.13,:spark-streaming-kafka-0-10_2.13,:spark-token-provider-kafka-0-10_2.13,:spark-avro_2.13"
+          - name: "yarn / k8s / hadoop-cloud / assembly"
+            slug: "yarn-k8s-hadoop-cloud-assembly"
+            modules: ":spark-yarn_2.13,:spark-kubernetes_2.13,:spark-hadoop-cloud_2.13,:spark-assembly_2.13"
     steps:
       - uses: actions/checkout@v6
 
@@ -77,8 +83,8 @@ jobs:
         run: |
           python3 -m pip install --upgrade pip
           python3 -m pip install 'numpy>=1.20.0' 'pyarrow' 'pandas' 'scipy' \
-            'unittest-xml-reporting' 'grpcio==1.56.0' 'protobuf==4.25.3' \
-            'grpcio-status==1.56.0' 'googleapis-common-protos==1.56.4' \
+            'unittest-xml-reporting' 'grpcio==1.76.0' 'protobuf==6.33.5' \
+            'grpcio-status==1.76.0' 'googleapis-common-protos==1.71.0' \
             'zstandard==0.25.0'
 
       - name: Build dependent modules (compile main+tests, install incl. test-jars)
@@ -149,23 +155,19 @@ jobs:
       matrix:
         include:
           - name: sql
-            modules: pyspark-sql,pyspark-resource,pyspark-testing
-          - name: core
-            modules: pyspark-core,pyspark-streaming
+            modules: pyspark-sql,pyspark-resource,pyspark-testing,pyspark-core,pyspark-errors,pyspark-logger
           - name: ml
-            modules: pyspark-mllib,pyspark-ml
+            modules: pyspark-mllib,pyspark-ml,pyspark-ml-connect,pyspark-pipelines
+          - name: streaming
+            modules: pyspark-streaming,pyspark-structured-streaming,pyspark-structured-streaming-connect
+          - name: connect
+            modules: pyspark-connect
           - name: pandas
             modules: pyspark-pandas
           - name: pandas-slow
             modules: pyspark-pandas-slow
-          - name: connect
-            modules: pyspark-connect
-          - name: pandas-connect
-            modules: pyspark-pandas-connect
-          - name: pandas-slow-connect
-            modules: pyspark-pandas-slow-connect
-          - name: errors
-            modules: pyspark-errors
+          - name: pandas-connect-and-slow
+            modules: pyspark-pandas-connect,pyspark-pandas-slow-connect
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       PYTHON_TO_TEST: python3.10
@@ -192,11 +194,12 @@ jobs:
             'numpy==1.26.4' 'pyarrow==18.0.0' 'pandas==2.2.0' 'scipy' \
             'unittest-xml-reporting' 'coverage' \
             'memory-profiler' 'plotly<6' 'matplotlib' \
-            'grpcio==1.56.0' 'grpcio-status==1.56.0' \
-            'protobuf==4.25.3' 'googleapis-common-protos==1.56.4' \
+            'grpcio==1.76.0' 'grpcio-status==1.76.0' \
+            'protobuf==6.33.5' 'googleapis-common-protos==1.71.0' \
             'graphviz>=0.20' 'openpyxl' \
             'scikit-learn==1.1.*' 'mlflow==3.12.0' \
-            'torch==2.0.1' 'torchvision==0.15.2' 'torcheval'
+            'torch==2.5.1' 'torchvision==0.20.1' 'torcheval' \
+            'zstandard==0.25.0'
 
       - name: Build Spark (full reactor including assembly)
         env:

diff --git a/AGENTS.md b/AGENTS.md
@@ -22,7 +22,7 @@ Avoid introducing non-ASCII characters in code or comments. String literals may
 
 ## Build and Test
 
-Build and tests can take a long time. Before running tests, ask the user if they have more changes to make.
+Build and tests can take a long time. If the user explicitly asked to run tests, run them. Otherwise (you are running tests on your own to verify a change), first ask the user if they have more changes to make.
 
 Prefer SBT over Maven for faster incremental compilation. Module names are defined in `project/SparkBuild.scala`.
 
@@ -128,3 +128,9 @@ DO NOT push to the upstream repo. Always push to the personal fork. Open PRs aga
 DO NOT force push or use `--amend` on pushed commits unless the user explicitly asks. If the remote branch has new commits, fetch and rebase before pushing.
 
 Always get user approval before external operations such as pushing commits, creating PRs, or posting comments. Use `gh pr create` to open PRs. If `gh` is not installed, generate the GitHub PR URL for the user and recommend installing the GitHub CLI.
+
+## Security
+
+Security model: [SECURITY.md](./SECURITY.md)
+
+Agents that scan this repository should consult `SECURITY.md` for the project's threat model, in-scope / out-of-scope declarations, and known non-findings before reporting issues.
diff --git a/R/.gitignore b/R/.gitignore
@@ -6,3 +6,7 @@ pkg/man
 pkg/html
 SparkR.Rcheck/
 SparkR_*.tar.gz
+# Transient copies made by dev/make-distribution.sh while building the package.
+pkg/LICENSE
+pkg/NOTICE
+DESCRIPTION.orig
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 4.2.0.1-4.3.0-0
+Version: 4.2.0.1-4.3.0-1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:

diff --git a/README.md b/README.md
@@ -52,7 +52,6 @@ This README file only contains basic setup instructions.
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml)                           |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.14_nogil.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14_nogil.yml)               |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml)                     |
-|            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml)               |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_connect40.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_connect40.yml)                 |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_connect.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_connect.yml)                     |
 |            | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_sparkr_window.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_sparkr_window.yml)                       |

diff --git a/SECURITY.md b/SECURITY.md
@@ -0,0 +1,13 @@
+# Security policy
+
+## Reporting a vulnerability
+
+To report a security vulnerability in Apache Spark, follow the ASF security process at <https://www.apache.org/security/>. Reports can be sent privately to security@apache.org or to private@spark.apache.org.
+
+## Threat model
+
+Apache Spark's security guidance for operators — covering authentication, encryption, access control, Kerberos integration, trust boundaries, environmental assumptions, security properties provided and not provided, and downstream operator responsibilities — is documented at:
+
+<https://spark.apache.org/docs/latest/security.html>
+
+Automated security scanning agents should consult that document for the project's in-scope / out-of-scope declarations before reporting issues.
diff --git a/assembly/pom.xml b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
@@ -23,6 +23,7 @@
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assumptions.assumeFalse;
 
 public class JavaUtilsSuite {
 
@@ -52,6 +53,10 @@ public void testCreateDirectory() throws IOException {
     // 4. The parent directory cannot write
     assertTrue(testDir.canWrite());
     assertTrue(testDir.setWritable(false));
+    // Skip when setWritable(false) has no effect (e.g. running as root,
+    // or on a filesystem that ignores POSIX write bits).
+    assumeFalse(testDir.canWrite(),
+      "setWritable(false) had no effect; skipping write-denied scenario");
     assertThrows(IOException.class,
       () -> JavaUtils.createDirectory(testDirPath, "scenario4"));
     assertTrue(testDir.setWritable(true));

diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/tags/pom.xml b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
-    <version>4.2.0.1-4.3.0-0</version>
+    <version>4.2.0.1-4.3.0-1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -110,7 +110,7 @@ public record CollationMeta(
   public static class Collation {
     public final String collationName;
     public final String provider;
-    private final Collator collator;
+    private final ThreadLocal<Collator> threadLocalCollator;
     public final Comparator<UTF8String> comparator;
 
     /**
@@ -187,7 +187,7 @@ public static class Collation {
     public Collation(
         String collationName,
         String provider,
-        Collator collator,
+        ThreadLocal<Collator> threadLocalCollator,
         Comparator<UTF8String> comparator,
         String version,
         Function<UTF8String, byte[]> sortKeyFunction,
@@ -197,7 +197,7 @@ public Collation(
         boolean supportsSpaceTrimming) {
       this.collationName = collationName;
       this.provider = provider;
-      this.collator = collator;
+      this.threadLocalCollator = threadLocalCollator;
       this.comparator = comparator;
       this.version = version;
       this.sortKeyFunction = sortKeyFunction;
@@ -216,7 +216,7 @@ public Collation(
     }
 
     public Collator getCollator() {
-      return collator;
+      return threadLocalCollator != null ? threadLocalCollator.get() : null;
     }
 
     /**
@@ -1016,29 +1016,40 @@ protected Collation buildCollation() {
           builder.setUnicodeLocaleKeyword("ks", "level1");
         }
         ULocale resultLocale = builder.build();
-        Collator collator = Collator.getInstance(resultLocale);
-        // Freeze ICU collator to ensure thread safety.
-        collator.freeze();
+
+        // Use thread-local Collator instances to avoid lock contention.
+        // A frozen RuleBasedCollator serializes all threads through a ReentrantLock on its
+        // internal collation buffer (used by getCollationKey/compare). By creating independent
+        // per-thread instances via Collator.getInstance(), each thread operates on its own
+        // buffer without locking. Each instance is frozen as a mutation guard so that any
+        // accidental call to setStrength() or similar throws immediately.
+        ThreadLocal<Collator> threadLocalCollator = ThreadLocal.withInitial(
+          () -> {
+            Collator collator = Collator.getInstance(resultLocale);
+            collator.freeze();
+            return collator;
+          });
 
         Comparator<UTF8String> comparator;
         Function<UTF8String, byte[]> sortKeyFunction;
 
         if (spaceTrimming == SpaceTrimming.NONE) {
           comparator = (s1, s2) ->
-            collator.compare(s1.toValidString(), s2.toValidString());
-          sortKeyFunction = s -> collator.getCollationKey(s.toValidString()).toByteArray();
+            threadLocalCollator.get().compare(s1.toValidString(), s2.toValidString());
+          sortKeyFunction = s ->
+            threadLocalCollator.get().getCollationKey(s.toValidString()).toByteArray();
         } else {
-          comparator = (s1, s2) -> collator.compare(
+          comparator = (s1, s2) -> threadLocalCollator.get().compare(
             applyTrimmingPolicy(s1, spaceTrimming).toValidString(),
             applyTrimmingPolicy(s2, spaceTrimming).toValidString());
-          sortKeyFunction = s -> collator.getCollationKey(
+          sortKeyFunction = s -> threadLocalCollator.get().getCollationKey(
             applyTrimmingPolicy(s, spaceTrimming).toValidString()).toByteArray();
         }
 
         return new Collation(
           normalizedCollationName(),
           PROVIDER_ICU,
-          collator,
+          threadLocalCollator,
           comparator,
           ICU_VERSION,
           sortKeyFunction,