fjosw · fjosw · Feb 3, 2026 · Feb 3, 2026 · Copilot · Feb 3, 2026
@@ -44,7 +44,7 @@ jobs:
 
       - name: Run tests with -Werror
         if: matrix.python-version != '3.14'
-        run: pytest --cov=pyerrors -vv -Werror
+        run: pytest --cov=pyerrors -vv
-        run: pytest --cov=pyerrors -vv
+        run: python -Werror -m pytest --cov=pyerrors -vv
-        run: pytest --cov=pyerrors -vv
+        run: python -Werror -m pytest --cov=pyerrors -vv
 
       - name: Run tests without -Werror for python 3.14
         if: matrix.python-version == '3.14'

@@ -145,9 +145,9 @@ def _serialize_df(df, gz=False):
         serialize = _need_to_serialize(out[column])
 
         if serialize is True:
-            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if x is not None else None)
+            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if not _is_null(x) else None)
             if gz is True:
-                out[column] = out[column].transform(lambda x: gzip.compress((x if x is not None else '').encode('utf-8')))
+                out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8')) if not _is_null(x) else gzip.compress(b''))
     return out
 
 
@@ -166,37 +166,47 @@ def _deserialize_df(df, auto_gamma=False):
     ------
     In case any column of the DataFrame is gzipped it is gunzipped in the process.
     """
-    for column in df.select_dtypes(include="object"):
-        if isinstance(df[column][0], bytes):
-            if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
-                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
-
-        if not all([e is None for e in df[column]]):
+    # In pandas 3+, string columns use 'str' dtype instead of 'object'
+    string_like_dtypes = ["object", "str"] if int(pd.__version__.split(".")[0]) >= 3 else ["object"]
+    for column in df.select_dtypes(include=string_like_dtypes):
+        if isinstance(df[column].iloc[0], bytes):
+            if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"):
+                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if x else '')
-                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if x else '')
+                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if not pd.isna(x) else '')
-        if isinstance(df[column].iloc[0], bytes):
-            if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"):
-                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if x else '')
+        col_series = df[column]
+        # Skip empty columns to avoid IndexError on iloc[0]
+        if col_series.empty:
+            continue
+        # Find the first non-null value for type inspection
+        first_valid_index = col_series.first_valid_index()
+        first_value = col_series.loc[first_valid_index] if first_valid_index is not None else None
+        if isinstance(first_value, bytes):
+            if first_value.startswith(b"\x1f\x8b\x08\x00"):
+                df[column] = col_series.transform(
+                    lambda x: gzip.decompress(x).decode('utf-8')
+                    if isinstance(x, (bytes, bytearray)) and x
+                    else ''
+                )
-                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if x else '')
+                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if not pd.isna(x) else '')
-        if isinstance(df[column].iloc[0], bytes):
-            if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"):
-                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if x else '')
+        col_series = df[column]
+        # Skip empty columns to avoid IndexError on iloc[0]
+        if col_series.empty:
+            continue
+        # Find the first non-null value for type inspection
+        first_valid_index = col_series.first_valid_index()
+        first_value = col_series.loc[first_valid_index] if first_valid_index is not None else None
+        if isinstance(first_value, bytes):
+            if first_value.startswith(b"\x1f\x8b\x08\x00"):
+                df[column] = col_series.transform(
+                    lambda x: gzip.decompress(x).decode('utf-8')
+                    if isinstance(x, (bytes, bytearray)) and x
+                    else ''
+                )
+
+        if df[column].notna().any():
             df[column] = df[column].replace({r'^$': None}, regex=True)
             i = 0
-            while df[column][i] is None:
+            while pd.isna(df[column].iloc[i]):
                 i += 1
-            while pd.isna(df[column].iloc[i]):
-                i += 1
+            col_len = len(df[column])
+            while i < col_len and pd.isna(df[column].iloc[i]):
+                i += 1
+            if i == col_len:
+                # All values are NA after replacement; nothing to deserialize in this column
+                continue
-            while pd.isna(df[column].iloc[i]):
-                i += 1
+            col_len = len(df[column])
+            while i < col_len and pd.isna(df[column].iloc[i]):
+                i += 1
+            if i == col_len:
+                # All values are NA after replacement; nothing to deserialize in this column
+                continue
-            if isinstance(df[column][i], str):
-                if '"program":' in df[column][i][:20]:
-                    df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
+            if isinstance(df[column].iloc[i], str):
+                if '"program":' in df[column].iloc[i][:20]:
+                    df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if not pd.isna(x) else None)
                     if auto_gamma is True:
-                        if isinstance(df[column][i], list):
-                            df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
+                        if isinstance(df[column].iloc[i], list):
+                            df[column].apply(lambda x: [o.gm() if not pd.isna(o) else x for o in x])
                         else:
-                            df[column].apply(lambda x: x.gm() if x is not None else x)
+                            df[column].apply(lambda x: x.gm() if not pd.isna(x) else x)
+        # Convert NA values back to Python None for compatibility with `x is None` checks
+        if df[column].isna().any():
+            df[column] = df[column].astype(object).where(df[column].notna(), None)
     return df
 
 
 def _need_to_serialize(col):
     serialize = False
     i = 0
-    while i < len(col) and col[i] is None:
+    while i < len(col) and _is_null(col.iloc[i]):
         i += 1
     if i == len(col):
         return serialize
-    if isinstance(col[i], (Obs, Corr)):
+    if isinstance(col.iloc[i], (Obs, Corr)):
         serialize = True
-    elif isinstance(col[i], list):
-        if all(isinstance(o, Obs) for o in col[i]):
+    elif isinstance(col.iloc[i], list):
+        if all(isinstance(o, Obs) for o in col.iloc[i]):
             serialize = True
     return serialize
+
+
+def _is_null(val):
+    """Check if a value is null (None or NA), handling list/array values."""
+    return False if isinstance(val, (list, np.ndarray)) else pd.isna(val)
-    return False if isinstance(val, (list, np.ndarray)) else pd.isna(val)
+    # Treat empty lists/arrays (and containers whose elements are all null) as null.
+    if isinstance(val, list):
+        if len(val) == 0:
+            return True
+        # A list is null only if all its elements are null.
+        return all(_is_null(v) for v in val)
+
+    if isinstance(val, np.ndarray):
+        if val.size == 0:
+            return True
+        # For object-dtype arrays, check elementwise using _is_null.
+        if val.dtype == object:
+            return all(_is_null(v) for v in val)
+        # For non-object arrays, rely on pandas/numpy NA detection.
+        return bool(np.all(pd.isna(val)))
+
+    return pd.isna(val)
-    return False if isinstance(val, (list, np.ndarray)) else pd.isna(val)
+    # Treat empty lists/arrays (and containers whose elements are all null) as null.
+    if isinstance(val, list):
+        if len(val) == 0:
+            return True
+        # A list is null only if all its elements are null.
+        return all(_is_null(v) for v in val)
+
+    if isinstance(val, np.ndarray):
+        if val.size == 0:
+            return True
+        # For object-dtype arrays, check elementwise using _is_null.
+        if val.dtype == object:
+            return all(_is_null(v) for v in val)
+        # For non-object arrays, rely on pandas/numpy NA detection.
+        return bool(np.all(pd.isna(val)))
+
+    return pd.isna(val)