From a04e261a5a404d7ce11ad20295ae1c13ef6af5af Mon Sep 17 00:00:00 2001
From: Avinash <avistylein3105@gmail.com>
Date: Tue, 5 May 2026 17:30:17 +0000
Subject: [PATCH 1/5] 2026-05-05T17:30:17Z

Staging branch manifest for N/A
---
 .staging-manifest.yaml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .staging-manifest.yaml

diff --git a/.staging-manifest.yaml b/.staging-manifest.yaml
new file mode 100644
index 00000000..c35832c4
--- /dev/null
+++ b/.staging-manifest.yaml
@@ -0,0 +1,32 @@
+base:
+  repository: "facebookincubator/velox"
+  commit: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+
+additional_merge:
+  null  # No additional repository merged
+
+merged_prs:
+  - number: 16423
+    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    author: "alice"
+    title: "feat #16423"
+    url: "https://github.com/facebookincubator/velox/pull/16423"
+  - number: 16444
+    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    author: "alice"
+    title: "feat #16444"
+    url: "https://github.com/facebookincubator/velox/pull/16444"
+
+skipped_prs:  # PRs that required manual conflict resolution and were not merged
+  - number: 16535
+    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    author: "alice"
+    title: "feat #16535"
+    url: "https://github.com/facebookincubator/velox/pull/16535"
+    reason: "manual conflict resolution required"
+  - number: 16859
+    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    author: "alice"
+    title: "feat #16859"
+    url: "https://github.com/facebookincubator/velox/pull/16859"
+    reason: "manual conflict resolution required"

From a1d61cba0a15a0088907ac0c467011aa4bdeae40 Mon Sep 17 00:00:00 2001
From: Avinash <avistylein3105@gmail.com>
Date: Mon, 18 May 2026 14:15:17 +0000
Subject: [PATCH 2/5] Fix tpcds integration tests: handle nulls in ORDER BY
 validation

---
 common/testing/result_comparison.py      | 45 ++++++++++++++++++++--
 common/testing/result_comparison_test.py | 49 ++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/common/testing/result_comparison.py b/common/testing/result_comparison.py
index 53b180c4..dca5a203 100644
--- a/common/testing/result_comparison.py
+++ b/common/testing/result_comparison.py
@@ -212,6 +212,13 @@ def _validate_orderby(df: pd.DataFrame, sort_col_indices: list[int], ascending:
     throughout — every value comes from one engine and is bit-identical to
     itself.
 
+    Null handling: SQL leaves the position of NULLs in ORDER BY engine-
+    defined (Presto/Velox default to NULLS LAST for ASC, NULLS FIRST for
+    DESC, others differ), so adjacent pairs where either side is NULL are
+    not counted as violations. Two NULLs are treated as tied (do not break
+    the outer tie group); a NULL adjacent to a non-NULL value is treated as
+    a change for tie-group tracking only.
+
     Raises AssertionError pointing at the first offending row. A no-op when
     sort_col_indices is empty or the frame has 0-1 rows.
     """
@@ -223,9 +230,41 @@ def _validate_orderby(df: pd.DataFrame, sort_col_indices: list[int], ascending:
     prior_changed = np.zeros(len(df) - 1, dtype=bool)
 
     for col_idx, asc in zip(sort_col_indices, ascending):
-        col = df.iloc[:, col_idx].to_numpy()
+        col_series = df.iloc[:, col_idx]
+        # Float columns route nulls through NaN so numpy's well-defined
+        # float comparisons (NaN < NaN -> False) keep them out of the
+        # anti-monotonic mask. Object/string/date columns can't be compared
+        # element-wise against NaN/None without raising, so we mask null-
+        # involving pairs out below.
+        if _is_float_col(col_series):
+            col = col_series.to_numpy(dtype=np.float64, na_value=np.nan)
+        else:
+            col = col_series.to_numpy()
         prev, curr = col[:-1], col[1:]
-        anti_monotonic = curr < prev if asc else curr > prev
+
+        is_null = pd.isna(col_series).to_numpy()
+        prev_null, curr_null = is_null[:-1], is_null[1:]
+        either_null = prev_null | curr_null
+
+        if either_null.any() and not _is_float_col(col_series):
+            # Object-dtype path: comparing None/NaN with anything raises
+            # TypeError. Restrict the inequality check to fully non-null
+            # pairs, and treat null<->non-null transitions as a value
+            # change (so we still break out of tie groups across them)
+            # while null<->null stays tied.
+            safe = ~either_null
+            anti_monotonic = np.zeros(len(prev), dtype=bool)
+            value_changed = np.zeros(len(prev), dtype=bool)
+            if safe.any():
+                ps, cs = prev[safe], curr[safe]
+                anti_monotonic[safe] = (cs < ps) if asc else (cs > ps)
+                value_changed[safe] = cs != ps
+            value_changed |= prev_null ^ curr_null
+        else:
+            with np.errstate(invalid="ignore"):
+                anti_monotonic = (curr < prev) if asc else (curr > prev)
+            value_changed = curr != prev
+
         # A real violation requires both anti-monotonicity AND prior columns
         # to be tied; otherwise we're across an outer-tie boundary, where
         # this column's direction doesn't apply.
@@ -237,7 +276,7 @@ def _validate_orderby(df: pd.DataFrame, sort_col_indices: list[int], ascending:
                 f"({'ASC' if asc else 'DESC'}) at row {i + 1}: "
                 f"got {curr[i]!r} after {prev[i]!r}"
             )
-        prior_changed = prior_changed | (curr != prev)
+        prior_changed = prior_changed | value_changed
 
 
 def _canonical_sort(df: pd.DataFrame) -> pd.DataFrame:
diff --git a/common/testing/result_comparison_test.py b/common/testing/result_comparison_test.py
index d8dc94fe..eb5154ac 100644
--- a/common/testing/result_comparison_test.py
+++ b/common/testing/result_comparison_test.py
@@ -103,6 +103,55 @@ def test_validate_orderby_handles_duplicate_column_labels():
     _validate_orderby(df, sort_col_indices=[0], ascending=[True])
 
 
+# ---------------------------------------------------------------------------
+# _validate_orderby null handling
+# ---------------------------------------------------------------------------
+
+
+def test_validate_orderby_object_column_with_nulls_does_not_raise():
+    # Regression: mixed object column with str and NaN used to crash with
+    # `TypeError: '<' not supported between instances of 'float' and 'str'`.
+    df = pd.DataFrame({0: ["a", "b", np.nan, "c"]})
+    _validate_orderby(df, sort_col_indices=[0], ascending=[True])
+
+
+def test_validate_orderby_object_column_all_none_does_not_raise():
+    # Regression: object column of all None used to crash with
+    # `TypeError: '<' not supported between instances of 'NoneType' and 'NoneType'`.
+    df = pd.DataFrame({0: [None, None, None]}, dtype=object)
+    _validate_orderby(df, sort_col_indices=[0], ascending=[True])
+
+
+def test_validate_orderby_object_column_null_then_value_does_not_raise():
+    # NULLS-FIRST style ordering (e.g. Presto DESC default): nulls then ASC
+    # of non-null values should pass.
+    df = pd.DataFrame({0: [None, None, "a", "b", "c"]}, dtype=object)
+    _validate_orderby(df, sort_col_indices=[0], ascending=[True])
+
+
+def test_validate_orderby_object_column_value_then_null_does_not_raise():
+    # NULLS-LAST style ordering (e.g. Presto ASC default): non-null ASC
+    # values then nulls at the tail should pass.
+    df = pd.DataFrame({0: ["a", "b", "c", None, None]}, dtype=object)
+    _validate_orderby(df, sort_col_indices=[0], ascending=[True])
+
+
+def test_validate_orderby_object_column_violation_between_non_nulls_still_raises():
+    # Null-masking only skips pairs that touch a null; real violations
+    # between two adjacent non-null values must still raise.
+    df = pd.DataFrame({0: ["b", "a", None, None]}, dtype=object)
+    with pytest.raises(AssertionError, match="ORDER BY"):
+        _validate_orderby(df, sort_col_indices=[0], ascending=[True])
+
+
+def test_validate_orderby_multi_column_nulls_in_primary_treated_as_tie_break():
+    # Primary col has nulls flanking a secondary-ordered block. Nulls should
+    # break the outer tie group (so the secondary can reset its ordering)
+    # rather than crash.
+    df = pd.DataFrame({0: [None, "x", "x", None], 1: [1, 1, 2, 1]})
+    _validate_orderby(df, sort_col_indices=[0, 1], ascending=[True, True])
+
+
 # ---------------------------------------------------------------------------
 # _canonical_sort
 # ---------------------------------------------------------------------------

From c07b81b2d4e05cd2305b1baf550ce4f7a663b45f Mon Sep 17 00:00:00 2001
From: Avinash <avistylein3105@gmail.com>
Date: Mon, 18 May 2026 14:23:44 +0000
Subject: [PATCH 3/5] test(result_comparison): keep object dtype across pandas
 3.0 string inference

---
 common/testing/result_comparison_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/common/testing/result_comparison_test.py b/common/testing/result_comparison_test.py
index eb5154ac..be5ec902 100644
--- a/common/testing/result_comparison_test.py
+++ b/common/testing/result_comparison_test.py
@@ -26,7 +26,7 @@ def test_normalize_to_expected_converts_str_dates_to_datetime64():
     When expected has datetime64 and actual has str dates,
     _normalize_to_expected converts actual to datetime64.
     """
-    actual = pd.DataFrame({0: ["1995-03-05", "1992-01-01", "1993-06-15"]})
+    actual = pd.DataFrame({0: pd.Series(["1995-03-05", "1992-01-01", "1993-06-15"], dtype=object)})
     expected = pd.DataFrame({0: pd.to_datetime(["1995-03-05", "1992-01-01", "1993-06-15"])})
 
     assert actual[0].dtype == object
@@ -42,8 +42,8 @@ def test_normalize_to_expected_handles_str_to_date_in_object_dtype():
     engines' parquet writers produce different DATE encodings).
     _normalize_to_expected converts actual's str values to datetime.date.
     """
-    actual = pd.DataFrame({0: ["1995-03-05", "1992-01-01"]})
-    expected = pd.DataFrame({0: [datetime.date(1995, 3, 5), datetime.date(1992, 1, 1)]})
+    actual = pd.DataFrame({0: pd.Series(["1995-03-05", "1992-01-01"], dtype=object)})
+    expected = pd.DataFrame({0: pd.Series([datetime.date(1995, 3, 5), datetime.date(1992, 1, 1)], dtype=object)})
 
     assert actual[0].dtype == object
     assert expected[0].dtype == object

From ffa2cf4fafe8909d999abdbed35f63c00812e1f4 Mon Sep 17 00:00:00 2001
From: Avinash <avistylein3105@gmail.com>
Date: Mon, 18 May 2026 14:30:01 +0000
Subject: [PATCH 4/5] removed unnecessary staging manifest file

---
 .staging-manifest.yaml | 32 --------------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 .staging-manifest.yaml

diff --git a/.staging-manifest.yaml b/.staging-manifest.yaml
deleted file mode 100644
index c35832c4..00000000
--- a/.staging-manifest.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-base:
-  repository: "facebookincubator/velox"
-  commit: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
-
-additional_merge:
-  null  # No additional repository merged
-
-merged_prs:
-  - number: 16423
-    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-    author: "alice"
-    title: "feat #16423"
-    url: "https://github.com/facebookincubator/velox/pull/16423"
-  - number: 16444
-    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-    author: "alice"
-    title: "feat #16444"
-    url: "https://github.com/facebookincubator/velox/pull/16444"
-
-skipped_prs:  # PRs that required manual conflict resolution and were not merged
-  - number: 16535
-    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-    author: "alice"
-    title: "feat #16535"
-    url: "https://github.com/facebookincubator/velox/pull/16535"
-    reason: "manual conflict resolution required"
-  - number: 16859
-    commit: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-    author: "alice"
-    title: "feat #16859"
-    url: "https://github.com/facebookincubator/velox/pull/16859"
-    reason: "manual conflict resolution required"

From c84d0916c9e8dd7a5c558d59901729776fd97ea5 Mon Sep 17 00:00:00 2001
From: Avinash <avistylein3105@gmail.com>
Date: Wed, 20 May 2026 14:36:11 +0000
Subject: [PATCH 5/5] removed unnecessary warn handler context

---
 common/testing/result_comparison.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/common/testing/result_comparison.py b/common/testing/result_comparison.py
index dca5a203..f4b90f3e 100644
--- a/common/testing/result_comparison.py
+++ b/common/testing/result_comparison.py
@@ -261,8 +261,7 @@ def _validate_orderby(df: pd.DataFrame, sort_col_indices: list[int], ascending:
                 value_changed[safe] = cs != ps
             value_changed |= prev_null ^ curr_null
         else:
-            with np.errstate(invalid="ignore"):
-                anti_monotonic = (curr < prev) if asc else (curr > prev)
+            anti_monotonic = (curr < prev) if asc else (curr > prev)
             value_changed = curr != prev
 
         # A real violation requires both anti-monotonicity AND prior columns