From 7fe8269ae8aacc0c5734a206d57b1c554cfb34ad Mon Sep 17 00:00:00 2001 From: Jon Mio <16511957+jonmio@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:07:19 -0500 Subject: [PATCH 1/7] Refactor create to mirror getOrCreate --- python/pyspark/sql/session.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 5bdd663c62934..10abfba4854ed 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -610,12 +610,16 @@ def create(self) -> "SparkSession": from pyspark.core.context import SparkContext with self._lock: - # Build SparkConf from options - sparkConf = SparkConf() - for key, value in self._options.items(): - sparkConf.set(key, str(value)) - - sc = SparkContext.getOrCreate(sparkConf) + session = SparkSession._instantiatedSession + # Get SparkContext + if session is None or session._sc._jsc is None: + sparkConf = SparkConf() + for key, value in self._options.items(): + sparkConf.set(key, value) + # This SparkContext may be an existing one. + sc = SparkContext.getOrCreate(sparkConf) + else: + sc = session._sc jSparkSessionClass = SparkSession._get_j_spark_session_class(sc._jvm) # Create a new SparkSession in the JVM jSparkSession = jSparkSessionClass.builder().config(self._options).create() From 3355e94077eea669430061a5a480af65edfd6b93 Mon Sep 17 00:00:00 2001 From: Jon Mio <16511957+jonmio@users.noreply.github.com> Date: Tue, 24 Feb 2026 10:35:30 -0500 Subject: [PATCH 2/7] Rename session variable to instantiated_session --- python/pyspark/sql/session.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 10abfba4854ed..d5b0a147f6470 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -610,16 +610,16 @@ def create(self) -> "SparkSession": from pyspark.core.context import SparkContext with self._lock: - session = SparkSession._instantiatedSession + instantiated_session = SparkSession._instantiatedSession # Get SparkContext - if session is None or session._sc._jsc is None: + if instantiated_session is None or instantiated_session._sc._jsc is None: sparkConf = SparkConf() for key, value in self._options.items(): sparkConf.set(key, value) # This SparkContext may be an existing one. sc = SparkContext.getOrCreate(sparkConf) else: - sc = session._sc + sc = instantiated_session._sc jSparkSessionClass = SparkSession._get_j_spark_session_class(sc._jvm) # Create a new SparkSession in the JVM jSparkSession = jSparkSessionClass.builder().config(self._options).create() From eaf71350045d0cb43a14f379d906879b7d838816 Mon Sep 17 00:00:00 2001 From: Jon Mio <16511957+jonmio@users.noreply.github.com> Date: Tue, 24 Feb 2026 10:37:46 -0500 Subject: [PATCH 3/7] Add test for existing Spark session behavior Add test to ensure SparkConf is not called when a session exists. --- python/pyspark/sql/tests/test_session.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py index 3606056f6793d..b71ac97eece2e 100644 --- a/python/pyspark/sql/tests/test_session.py +++ b/python/pyspark/sql/tests/test_session.py @@ -616,6 +616,19 @@ def test_create_sessions_share_spark_context(self): finally: session2.stop() + def test_create_does_not_construct_spark_conf_when_session_exists(self): + """Ensure SparkConf() is not called when a valid session already exists.""" + self.session = self._get_builder().create() + with unittest.mock.patch( + "pyspark.sql.session.SparkConf" + ) as mock_spark_conf: + session2 = self._get_builder().create() + try: + mock_spark_conf.assert_not_called() + self.assertIs(session2.sparkContext, self.session.sparkContext) + finally: + session2.stop() + class SparkSessionProfileTests(unittest.TestCase, PySparkErrorTestUtils): def setUp(self): From 84c1a04a8f22afc6a46100896c1a4733d88fe0c5 Mon Sep 17 00:00:00 2001 From: Jon Mio <16511957+jonmio@users.noreply.github.com> Date: Tue, 24 Feb 2026 12:16:14 -0500 Subject: [PATCH 4/7] Review comment --- python/pyspark/sql/session.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index d5b0a147f6470..25b58d08e4595 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -612,14 +612,14 @@ def create(self) -> "SparkSession": with self._lock: instantiated_session = SparkSession._instantiatedSession # Get SparkContext - if instantiated_session is None or instantiated_session._sc._jsc is None: + if instantiated_session is not None and instantiated_session._sc._jsc is not None: + sc = instantiated_session._sc + else: sparkConf = SparkConf() for key, value in self._options.items(): sparkConf.set(key, value) # This SparkContext may be an existing one. sc = SparkContext.getOrCreate(sparkConf) - else: - sc = instantiated_session._sc jSparkSessionClass = SparkSession._get_j_spark_session_class(sc._jvm) # Create a new SparkSession in the JVM jSparkSession = jSparkSessionClass.builder().config(self._options).create() From 1b3fb9e278a9f4ca281298896f95f6f468ae002b Mon Sep 17 00:00:00 2001 From: Jon Mio <16511957+jonmio@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:59:24 -0500 Subject: [PATCH 5/7] lint --- python/pyspark/sql/tests/test_session.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py index b71ac97eece2e..aee5e9e780d7e 100644 --- a/python/pyspark/sql/tests/test_session.py +++ b/python/pyspark/sql/tests/test_session.py @@ -619,9 +619,7 @@ def test_create_sessions_share_spark_context(self): def test_create_does_not_construct_spark_conf_when_session_exists(self): """Ensure SparkConf() is not called when a valid session already exists.""" self.session = self._get_builder().create() - with unittest.mock.patch( - "pyspark.sql.session.SparkConf" - ) as mock_spark_conf: + with unittest.mock.patch("pyspark.sql.session.SparkConf") as mock_spark_conf: session2 = self._get_builder().create() try: mock_spark_conf.assert_not_called() From d3ff311c88b4cdc38b6487e0b2c14875bb743e35 Mon Sep 17 00:00:00 2001 From: Jon Mio <16511957+jonmio@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:00:13 -0500 Subject: [PATCH 6/7] lint --- python/pyspark/sql/session.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 25b58d08e4595..22bbb73134314 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -612,7 +612,10 @@ def create(self) -> "SparkSession": with self._lock: instantiated_session = SparkSession._instantiatedSession # Get SparkContext - if instantiated_session is not None and instantiated_session._sc._jsc is not None: + if ( + instantiated_session is not None + and instantiated_session._sc._jsc is not None + ): sc = instantiated_session._sc else: sparkConf = SparkConf() From 88ccddfef2494e1de6e83429e9b00764dce601ff Mon Sep 17 00:00:00 2001 From: Jon Mio <16511957+jonmio@users.noreply.github.com> Date: Fri, 27 Feb 2026 20:39:36 -0500 Subject: [PATCH 7/7] Add requested test Add test to verify mutable SQL configs are applied per session. --- python/pyspark/sql/tests/test_session.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py index aee5e9e780d7e..fb86deb33a2da 100644 --- a/python/pyspark/sql/tests/test_session.py +++ b/python/pyspark/sql/tests/test_session.py @@ -627,6 +627,21 @@ def test_create_does_not_construct_spark_conf_when_session_exists(self): finally: session2.stop() + def test_create_applies_mutable_conf_to_second_session(self): + """ + Ensure that mutable SQL configs passed to create() are applied per-session + even when a valid SparkSession already exists. + """ + key = "spark.sql.shuffle.partitions" + self.session = self._get_builder().config(key, "5").create() + self.assertEqual(self.session.conf.get(key), "5") + session2 = self._get_builder().config(key, "7").create() + try: + self.assertEqual(session2.conf.get(key), "7") + self.assertIs(session2.sparkContext, self.session.sparkContext) + finally: + session2.stop() + class SparkSessionProfileTests(unittest.TestCase, PySparkErrorTestUtils): def setUp(self):