From 233f54625e23ee98aeb04d16866ed4427c90b5ac Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Wed, 10 Dec 2025 15:40:13 +0100 Subject: [PATCH 1/8] better handling of circular references # Conflicts: # src/py_avro_schema/_schemas.py # tests/test_plain_class.py --- src/py_avro_schema/_schemas.py | 169 ++++++++++++++++++++++++++++----- tests/test_plain_class.py | 29 +++++- 2 files changed, 172 insertions(+), 26 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index ad21090..f05fdce 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -26,6 +26,7 @@ import re import sys import types +import typing import uuid from enum import StrEnum from typing import ( @@ -214,6 +215,7 @@ def schema( namespace: Optional[str] = None, names: Optional[NamesType] = None, options: Option = Option(0), + processing: set[type] | None = None, ) -> JSONType: """ Generate and return an Avro schema for a given Python type @@ -228,12 +230,17 @@ def schema( """ if names is None: names = [] - schema_obj = _schema_obj(py_type, namespace=namespace, options=options) + schema_obj = _schema_obj(py_type, namespace=namespace, options=options, processing=processing) schema_data = schema_obj.data(names=names) return schema_data -def _schema_obj(py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)) -> "Schema": +def _schema_obj( + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, +) -> "Schema": """ Dispatch to relevant schema classes @@ -241,10 +248,11 @@ def _schema_obj(py_type: Type, namespace: Optional[str] = None, options: Option :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ + processing = processing or set() # Find concrete Schema subclasses defined in the current module for schema_class in sorted(_SCHEMA_CLASSES, key=lambda c: getattr(c, "__py_avro_priority", 0)): # Find the first schema class that handles py_type - schema_obj = schema_class(py_type, namespace=namespace, options=options) # type: ignore + schema_obj = schema_class(py_type, namespace=namespace, options=options, processing=processing) # type: ignore if schema_obj: return schema_obj raise TypeNotSupportedError(f"Cannot generate Avro schema for Python type {py_type}") @@ -274,7 +282,13 @@ def validate_name(value: str) -> str: class Schema(abc.ABC): """Schema base""" - def __new__(cls, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __new__( + cls, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ Create an instance of this schema class if it handles py_type @@ -287,17 +301,25 @@ def __new__(cls, py_type: Type, namespace: Optional[str] = None, options: Option else: return None - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ A schema base :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. + :param processing: Internal parameter to track types currently being processed (for circular dependencies). """ self.py_type = py_type self.options = options self._namespace = namespace # Namespace override + self.processing = processing or set() @property def namespace_override(self) -> Optional[str]: @@ -428,7 +450,13 @@ def data(self, names: NamesType) -> JSONObj: class LiteralSchema(Schema): """An Avro schema of any type for a Python Literal type, e.g. ``Literal[""]``""" - def __init__(self, py_type: Type[Any], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[Any], + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """ An Avro schema of any type for a Python Literal type, e.g. ``Literal[""]`` @@ -462,7 +490,13 @@ def data(self, names: NamesType) -> JSONType: class FinalSchema(Schema): """An Avro schema for Python ``typing.Final``""" - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """An Avro schema for Python ``typing.Final``""" super().__init__(py_type, namespace, options) py_type = _type_from_annotated(py_type) @@ -757,6 +791,7 @@ def __init__( py_type: Type[collections.abc.MutableSequence], namespace: Optional[str] = None, options: Option = Option(0), + **kwargs, ): """ An Avro array schema for a given Python sequence @@ -804,6 +839,7 @@ def __init__( py_type: type[collections.abc.MutableSet], namespace: str | None = None, options: Option = Option(0), + **kwargs, ): """ An Avro array schema for a given Python sequence @@ -833,6 +869,7 @@ def __init__( py_type: Type[collections.abc.MutableMapping], namespace: Optional[str] = None, options: Option = Option(0), + processing: set[type] | None = None, ): """ An Avro map schema for a given Python mapping @@ -840,8 +877,9 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. + :param processing: Internal parameter to track types currently being processed (for circular dependencies). """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) args = get_args(py_type) if args[0] != str and not issubclass(args[0], StrEnum): @@ -879,7 +917,13 @@ def handles_type(cls, py_type: Type) -> bool: return origin == Union or origin == union_type return origin == Union - def __init__(self, py_type: Type[Union[Any]], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[Union[Any]], + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """ An Avro union schema for a given Python union type @@ -976,7 +1020,13 @@ def make_default(self, py_default: Any) -> JSONType: class NamedSchema(Schema): """A named Avro schema base class""" - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ A named Avro schema base class @@ -984,7 +1034,7 @@ def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Opti :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) self.name = py_type.__name__ @@ -1032,7 +1082,13 @@ def handles_type(cls, py_type: Type) -> bool: """Whether this schema class can represent a given Python class""" return _is_class(py_type, enum.Enum) - def __init__(self, py_type: Type[enum.Enum], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[enum.Enum], + namespace: Optional[str] = None, + options: Option = Option(0), + **kwargs, + ): """ An Avro enum schema for a Python enum with string values @@ -1098,15 +1154,25 @@ def data_before_deduplication(self, names: NamesType) -> JSONObj: class RecordSchema(NamedSchema): """An Avro record schema base class""" - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema base class :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. + :param processing: Internal parameter to track types currently being processed (for circular dependencies). """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) + self.processing = processing or set() + # Add this type to the processing set to detect circular dependencies + self.processing.add(py_type) self.record_fields: collections.abc.Sequence[RecordField] = [] def data_before_deduplication(self, names: NamesType) -> JSONObj: @@ -1127,6 +1193,7 @@ def data_before_deduplication(self, names: NamesType) -> JSONObj: record_schema["doc"] = doc if Option.ADD_REFERENCE_ID in self.options: record_schema["fields"].append({"name": REF_ID_KEY, "type": ["null", "long"], "default": None}) + self.processing.discard(self.py_type) return record_schema @@ -1142,6 +1209,7 @@ def __init__( default: Any = dataclasses.MISSING, docs: str = "", options: Option = Option(0), + processing: set[type] | None = None, ): """ An Avro record field @@ -1154,6 +1222,8 @@ def __init__( :param docs: Field documentation or description :param options: Schema generation options """ + if processing is None: + processing = set() if aliases is None: aliases = [] self.py_type = py_type @@ -1163,7 +1233,14 @@ def __init__( self.default = default self.docs = docs self.options = options - self.schema = _schema_obj(self.py_type, namespace=self._namespace, options=options) + + _type = self.py_type + # Check for circular dependency + if self.py_type in processing and hasattr(self.py_type, "__name__"): + # This is a circular reference - use a ForwardRef to break the cycle + _type = ForwardRef(py_type.__name__) # type: ignore + + self.schema = _schema_obj(_type, namespace=self._namespace, options=options, processing=processing) if self.default != dataclasses.MISSING: if isinstance(self.schema, UnionSchema): @@ -1214,7 +1291,13 @@ def handles_type(cls, py_type: Type) -> bool: py_type = _type_from_annotated(py_type) return dataclasses.is_dataclass(py_type) - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a given Python dataclass @@ -1222,7 +1305,7 @@ def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Opti :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) self.py_fields = dataclasses.fields(py_type) self.record_fields = [self._record_field(field) for field in self.py_fields] @@ -1240,6 +1323,7 @@ def _record_field(self, py_field: dataclasses.Field) -> RecordField: default=default, aliases=aliases, options=self.options, + processing=self.processing, ) return field_obj @@ -1262,7 +1346,13 @@ def handles_type(cls, py_type: Type) -> bool: py_type = _type_from_annotated(py_type) return hasattr(py_type, "__pydantic_private__") - def __init__(self, py_type: Type[pydantic.BaseModel], namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type[pydantic.BaseModel], + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a given Pydantic model class @@ -1270,7 +1360,7 @@ def __init__(self, py_type: Type[pydantic.BaseModel], namespace: Optional[str] = :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) if Option.USE_CLASS_ALIAS in self.options: self.name = py_type.model_config.get("title") or self.name self.py_fields = py_type.model_fields @@ -1290,6 +1380,7 @@ def _record_field(self, name: str, py_field: pydantic.fields.FieldInfo) -> Recor aliases=aliases, docs=py_field.description or "", options=self.options, + processing=self.processing, ) return field_obj @@ -1336,10 +1427,16 @@ def handles_type(cls, py_type: Type) -> bool: # If we are subclassing a string, used the "named string" approach and (inspect.isclass(py_type) and not issubclass(py_type, str)) # and any other class with typed annotations - and bool(get_type_hints(py_type)) + and has_annotations(py_type) ) - def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: Optional[str] = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a plain Python class with type hints @@ -1347,7 +1444,7 @@ def __init__(self, py_type: Type, namespace: Optional[str] = None, options: Opti :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) # Try to get resolved type hints, but fall back to raw annotations if there are unresolved forward refs @@ -1372,6 +1469,7 @@ def _record_field(self, py_field: tuple[str, Type]) -> RecordField: default=default, aliases=aliases, options=self.options, + processing=self.processing, ) return field_obj @@ -1392,7 +1490,13 @@ def handles_type(cls, py_type: Type) -> bool: """Whether this schema can represent a TypedDict""" return is_typeddict(py_type) - def __init__(self, py_type: Type, namespace: str | None = None, options: Option = Option(0)): + def __init__( + self, + py_type: Type, + namespace: str | None = None, + options: Option = Option(0), + processing: set[type] | None = None, + ): """ An Avro record schema for a given Python TypedDict @@ -1400,7 +1504,7 @@ def __init__(self, py_type: Type, namespace: str | None = None, options: Option :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) self.is_total = py_type.__dict__.get("__total__", True) self.py_fields: dict[str, Type] = get_type_hints(py_type, include_extras=True) @@ -1437,6 +1541,7 @@ def _record_field(self, py_field: tuple[str, Type]) -> RecordField: aliases=aliases, default=default, options=self.options, + processing=self.processing, ) return field_obj @@ -1505,7 +1610,11 @@ def is_logically_json(py_type: Type) -> bool: return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) -def _is_class(py_type: Any, of_types: Union[Type, Tuple[Type, ...]], include_subclasses: bool = True) -> bool: +def _is_class( + py_type: Any, + of_types: Union[Type, Tuple[Type, ...]], + include_subclasses: bool = True, +) -> bool: """Return whether the given type is a (sub) class of a type or types""" py_type = _type_from_annotated(py_type) if include_subclasses: @@ -1530,6 +1639,16 @@ def _type_from_annotated(py_type: Type) -> Type: return py_type +def has_annotations(py_type: Type) -> bool: + """Checks if a type has annotations""" + py_type = _type_from_annotated(py_type) + try: + return bool(typing.get_type_hints(py_type)) + except Exception: + pass + return hasattr(py_type, "__annotations__") + + def _avro_name_for_type(py_type: Type) -> str: """ Generate an Avro-compatible name for a given Python type. It is used when wrapping container types (mostly lists diff --git a/tests/test_plain_class.py b/tests/test_plain_class.py index b814547..3506c39 100644 --- a/tests/test_plain_class.py +++ b/tests/test_plain_class.py @@ -10,7 +10,7 @@ # specific language governing permissions and limitations under the License. import re -from typing import Annotated, Final +from typing import Annotated, Final, ForwardRef import pytest @@ -201,3 +201,30 @@ class PyType: ], } assert_schema(PyType, expected, options=Option.ADD_REFERENCE_ID) + + +class PyType: + backend: ForwardRef("Backend") + value: str + + +class Backend: + py_type: PyType + + +def test_circular_dependencies(): + expected = { + "fields": [ + { + "name": "py_type", + "type": { + "fields": [{"name": "backend", "type": "Backend"}, {"name": "value", "type": "string"}], + "name": "PyType", + "type": "record", + }, + } + ], + "name": "Backend", + "type": "record", + } + assert_schema(Backend, expected) From c06293a3695013c7939c2aa4dd11d410c5db5fa7 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 11 Dec 2025 09:57:35 +0100 Subject: [PATCH 2/8] cleanup --- src/py_avro_schema/_schemas.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index f05fdce..88d5fb6 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -26,7 +26,6 @@ import re import sys import types -import typing import uuid from enum import StrEnum from typing import ( @@ -1643,7 +1642,7 @@ def has_annotations(py_type: Type) -> bool: """Checks if a type has annotations""" py_type = _type_from_annotated(py_type) try: - return bool(typing.get_type_hints(py_type)) + return bool(get_type_hints(py_type)) except Exception: pass return hasattr(py_type, "__annotations__") From fdf7571a25940d40ee418265e470ad13cd6a4e02 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 11 Dec 2025 13:38:02 +0100 Subject: [PATCH 3/8] naming --- src/py_avro_schema/_schemas.py | 8 ++++---- tests/test_plain_class.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 88d5fb6..d15a8f6 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -313,7 +313,7 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. - :param processing: Internal parameter to track types currently being processed (for circular dependencies). + :param processing: Internal parameter to track types currently being processed (for circular references). """ self.py_type = py_type self.options = options @@ -876,7 +876,7 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. - :param processing: Internal parameter to track types currently being processed (for circular dependencies). + :param processing: Internal parameter to track types currently being processed (for circular references). """ super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) @@ -1166,11 +1166,11 @@ def __init__( :param py_type: The Python class to generate a schema for. :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. - :param processing: Internal parameter to track types currently being processed (for circular dependencies). + :param processing: Internal parameter to track types currently being processed (for circular references). """ super().__init__(py_type, namespace=namespace, options=options, processing=processing) self.processing = processing or set() - # Add this type to the processing set to detect circular dependencies + # Add this type to the processing set to detect circular references self.processing.add(py_type) self.record_fields: collections.abc.Sequence[RecordField] = [] diff --git a/tests/test_plain_class.py b/tests/test_plain_class.py index 3506c39..20ca8e8 100644 --- a/tests/test_plain_class.py +++ b/tests/test_plain_class.py @@ -212,7 +212,7 @@ class Backend: py_type: PyType -def test_circular_dependencies(): +def test_circular_references(): expected = { "fields": [ { From 35ce48d4bad3c9d13dfe70df3bb9cce9581cbfed Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 11 Dec 2025 15:35:04 +0100 Subject: [PATCH 4/8] get_origin might fall into infinite recusion --- src/py_avro_schema/_schemas.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index d15a8f6..97eb7b4 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -1606,7 +1606,10 @@ def _is_list_any(py_type: Type) -> bool: def is_logically_json(py_type: Type) -> bool: """Returns whether a given type is logically a JSON and can be serialized as such""" - return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) + try: + return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) + except RecursionError: + return False def _is_class( From 481f86b913d24ec74a89178011d70dfffdff6492 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Wed, 22 Apr 2026 18:29:05 +0200 Subject: [PATCH 5/8] picking up the PR after a while --- src/py_avro_schema/_schemas.py | 32 +++++++++++++------------- tests/test_typed_dict.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 97eb7b4..5cdd9ad 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -248,6 +248,10 @@ def _schema_obj( :param options: Schema generation options. """ processing = processing or set() + # If py_type is currently being processed further up the stack, emit a ForwardRef to break the cycle + unwrapped = _type_from_annotated(py_type) + if unwrapped in processing and hasattr(unwrapped, "__name__"): + py_type = ForwardRef(unwrapped.__name__) # type: ignore # Find concrete Schema subclasses defined in the current module for schema_class in sorted(_SCHEMA_CLASSES, key=lambda c: getattr(c, "__py_avro_priority", 0)): # Find the first schema class that handles py_type @@ -790,7 +794,7 @@ def __init__( py_type: Type[collections.abc.MutableSequence], namespace: Optional[str] = None, options: Option = Option(0), - **kwargs, + processing: set[type] | None = None, ): """ An Avro array schema for a given Python sequence @@ -799,10 +803,10 @@ def __init__( :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) args = get_args(py_type) # TODO: validate if args has exactly 1 item? - self.items_schema = _schema_obj(args[0], namespace=namespace, options=options) + self.items_schema = _schema_obj(args[0], namespace=namespace, options=options, processing=self.processing) def data(self, names: NamesType) -> JSONType: """Return the schema data""" @@ -838,7 +842,7 @@ def __init__( py_type: type[collections.abc.MutableSet], namespace: str | None = None, options: Option = Option(0), - **kwargs, + processing: set[type] | None = None, ): """ An Avro array schema for a given Python sequence @@ -847,7 +851,7 @@ def __init__( :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) # type: ignore + super().__init__(py_type, namespace=namespace, options=options, processing=processing) # type: ignore @register_schema @@ -883,7 +887,7 @@ def __init__( args = get_args(py_type) if args[0] != str and not issubclass(args[0], StrEnum): raise TypeError(f"Cannot generate Avro mapping schema for Python dictionary {py_type} with non-string keys") - self.values_schema = _schema_obj(args[1], namespace=namespace, options=options) + self.values_schema = _schema_obj(args[1], namespace=namespace, options=options, processing=self.processing) def data(self, names: NamesType) -> JSONType: """Return the schema data""" @@ -921,7 +925,7 @@ def __init__( py_type: Type[Union[Any]], namespace: Optional[str] = None, options: Option = Option(0), - **kwargs, + processing: set[type] | None = None, ): """ An Avro union schema for a given Python union type @@ -930,11 +934,13 @@ def __init__( :param namespace: The Avro namespace to add to schemas. :param options: Schema generation options. """ - super().__init__(py_type, namespace=namespace, options=options) + super().__init__(py_type, namespace=namespace, options=options, processing=processing) py_type = _type_from_annotated(py_type) args = get_args(py_type) self._validate_union(args) - self.item_schemas = [_schema_obj(arg, namespace=namespace, options=options) for arg in args] + self.item_schemas = [ + _schema_obj(arg, namespace=namespace, options=options, processing=self.processing) for arg in args + ] @staticmethod def _validate_union(args: tuple[Any, ...]) -> None: @@ -1233,13 +1239,7 @@ def __init__( self.docs = docs self.options = options - _type = self.py_type - # Check for circular dependency - if self.py_type in processing and hasattr(self.py_type, "__name__"): - # This is a circular reference - use a ForwardRef to break the cycle - _type = ForwardRef(py_type.__name__) # type: ignore - - self.schema = _schema_obj(_type, namespace=self._namespace, options=options, processing=processing) + self.schema = _schema_obj(self.py_type, namespace=self._namespace, options=options, processing=processing) if self.default != dataclasses.MISSING: if isinstance(self.schema, UnionSchema): diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py index 3ce9b73..560bc77 100644 --- a/tests/test_typed_dict.py +++ b/tests/test_typed_dict.py @@ -211,3 +211,45 @@ class PyType2(TypedDict): py_type = Union[PyType, PyType2] with pytest.raises(TypeError): py_avro_schema._schemas.schema(py_type) + + +ConfigurationList = list["Configuration"] + + +class Configuration(TypedDict): + Configurations: ConfigurationList | None + + +def test_recursive_reference(): + class PyType(TypedDict): + Configurations: ConfigurationList | None + + expected = { + "type": "record", + "name": "PyType", + "fields": [ + { + "name": "Configurations", + "type": [ + { + "type": "array", + "items": { + "type": "record", + "name": "Configuration", + "fields": [ + { + "name": "Configurations", + "type": [ + {"type": "array", "items": "Configuration"}, + "null", + ], + }, + ], + }, + }, + "null", + ], + }, + ], + } + assert_schema(PyType, expected) From 3b0f08f1b225b16cdb00acfefd15bfcbe03ed4b6 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 23 Apr 2026 12:40:55 +0200 Subject: [PATCH 6/8] Dealing better with wrappers --- src/py_avro_schema/_schemas.py | 37 +++++++----- tests/test_typed_dict.py | 101 +++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 15 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 5cdd9ad..b8de902 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -359,22 +359,27 @@ def make_default(self, py_default: Any) -> Any: """ return py_default - def _wrap_as_record(self, inner_schema: JSONObj, names: NamesType) -> JSONType: + def _wrap_as_record( + self, + names: NamesType, + build_inner: collections.abc.Callable[[NamesType], JSONObj], + ) -> JSONType: """ - Wrap a container schema (array or map) into an Avro record with ``__id`` and ``__data`` fields. - Handles deduplication via ``names``. + Wrap a container schema into an Avro record with ``__id`` and ``__data`` fields. The wrapper's + fullname is reserved in ``names`` before internal data is computed. This is to avoid a recursive inner type + to be expanded again (as the wrapper is). """ record_name = _avro_name_for_type(_type_from_annotated(self.py_type)) fullname = f"{self.namespace}.{record_name}" if self.namespace else record_name if fullname in names: return fullname names.append(fullname) - record_schema = { + record_schema: JSONObj = { "type": "record", "name": record_name, "fields": [ {"name": REF_ID_KEY, "type": ["null", "long"], "default": None}, - {"name": REF_DATA_KEY, "type": inner_schema}, + {"name": REF_DATA_KEY, "type": build_inner(names)}, ], } if self.namespace: @@ -810,10 +815,12 @@ def __init__( def data(self, names: NamesType) -> JSONType: """Return the schema data""" - array_schema = {"type": "array", "items": self.items_schema.data(names=names)} if Option.WRAP_INTO_RECORDS not in self.options: - return array_schema - return self._wrap_as_record(array_schema, names) + return {"type": "array", "items": self.items_schema.data(names=names)} + return self._wrap_as_record( + names, + lambda n: {"type": "array", "items": self.items_schema.data(names=n)}, + ) def make_default(self, py_default: collections.abc.Sequence) -> JSONType: """Return an Avro schema compliant default value for a given Python Sequence @@ -891,10 +898,12 @@ def __init__( def data(self, names: NamesType) -> JSONType: """Return the schema data""" - map_schema = {"type": "map", "values": self.values_schema.data(names=names)} if Option.WRAP_INTO_RECORDS not in self.options: - return map_schema - return self._wrap_as_record(map_schema, names) + return {"type": "map", "values": self.values_schema.data(names=names)} + return self._wrap_as_record( + names, + lambda n: {"type": "map", "values": self.values_schema.data(names=n)}, + ) def make_default(self, py_default: Any) -> JSONType: """Return an Avro schema compliant default value for a given Python value""" @@ -1175,9 +1184,8 @@ def __init__( :param processing: Internal parameter to track types currently being processed (for circular references). """ super().__init__(py_type, namespace=namespace, options=options, processing=processing) - self.processing = processing or set() - # Add this type to the processing set to detect circular references - self.processing.add(py_type) + # Per each record we copy the set, so sibling fields don't see each other as in progress. + self.processing = self.processing | {py_type} self.record_fields: collections.abc.Sequence[RecordField] = [] def data_before_deduplication(self, names: NamesType) -> JSONObj: @@ -1198,7 +1206,6 @@ def data_before_deduplication(self, names: NamesType) -> JSONObj: record_schema["doc"] = doc if Option.ADD_REFERENCE_ID in self.options: record_schema["fields"].append({"name": REF_ID_KEY, "type": ["null", "long"], "default": None}) - self.processing.discard(self.py_type) return record_schema diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py index 560bc77..e9ced1a 100644 --- a/tests/test_typed_dict.py +++ b/tests/test_typed_dict.py @@ -213,6 +213,58 @@ class PyType2(TypedDict): py_avro_schema._schemas.schema(py_type) +class SiblingInner(TypedDict): + x: str + + +class SiblingOuter(TypedDict): + a: SiblingInner + b: list[SiblingInner] + + +def test_sibling_fields_same_(): + expected = { + "type": "record", + "name": "SiblingOuter", + "namespace": "test_typed_dict", + "fields": [ + { + "name": "a", + "type": { + "type": "record", + "name": "SiblingInner", + "namespace": "test_typed_dict", + "fields": [{"name": "x", "type": "string"}], + }, + }, + { + "name": "b", + "type": { + "type": "record", + "name": "TestTypedDictSiblingInnerList", + "namespace": "builtins", + "fields": [ + {"name": "__id", "type": ["null", "long"], "default": None}, + { + "name": "__data", + "type": { + "type": "array", + "items": "test_typed_dict.SiblingInner", + }, + }, + ], + }, + }, + ], + } + assert_schema( + SiblingOuter, + expected, + options=pas.Option.WRAP_INTO_RECORDS, + do_auto_namespace=True, + ) + + ConfigurationList = list["Configuration"] @@ -221,6 +273,8 @@ class Configuration(TypedDict): def test_recursive_reference(): + """Test simple recursive reference with no ``WRAP_INTO_RECORDS``.""" + class PyType(TypedDict): Configurations: ConfigurationList | None @@ -253,3 +307,50 @@ class PyType(TypedDict): ], } assert_schema(PyType, expected) + + +def test_recursive_reference_with_wrap_into_records(): + """Checks that a self-referential record combined with ``WRAP_INTO_RECORDS`` must define the wrapper once.""" + + class PyType(TypedDict): + Configurations: ConfigurationList | None + + expected = { + "type": "record", + "name": "PyType", + "fields": [ + { + "name": "Configurations", + "type": [ + { + "type": "record", + "name": "TestTypedDictConfigurationList", + "fields": [ + {"name": "__id", "type": ["null", "long"], "default": None}, + { + "name": "__data", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "Configuration", + "fields": [ + { + "name": "Configurations", + "type": [ + "TestTypedDictConfigurationList", + "null", + ], + }, + ], + }, + }, + }, + ], + }, + "null", + ], + }, + ], + } + assert_schema(PyType, expected, options=pas.Option.WRAP_INTO_RECORDS) From c0cb7e11e57121a48ae9c3135c849404535f7442 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 23 Apr 2026 15:01:15 +0200 Subject: [PATCH 7/8] cosmetic --- src/py_avro_schema/_schemas.py | 2 +- tests/test_typed_dict.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index b8de902..3112619 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -1184,7 +1184,7 @@ def __init__( :param processing: Internal parameter to track types currently being processed (for circular references). """ super().__init__(py_type, namespace=namespace, options=options, processing=processing) - # Per each record we copy the set, so sibling fields don't see each other as in progress. + # Per each record we copy the set, to separete executions between siblings. self.processing = self.processing | {py_type} self.record_fields: collections.abc.Sequence[RecordField] = [] diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py index e9ced1a..646226f 100644 --- a/tests/test_typed_dict.py +++ b/tests/test_typed_dict.py @@ -222,7 +222,8 @@ class SiblingOuter(TypedDict): b: list[SiblingInner] -def test_sibling_fields_same_(): +def test_sibling_fields_references(): + """Check sibling attributes in a record won't all get a bare reference.""" expected = { "type": "record", "name": "SiblingOuter", From 1cf9a3a0dbd6cb06c0a33f778d9663421118d8b9 Mon Sep 17 00:00:00 2001 From: Giovanni Grano Date: Thu, 23 Apr 2026 15:47:47 +0200 Subject: [PATCH 8/8] extra handling of recursive refs with wraps and namespaces --- src/py_avro_schema/_schemas.py | 20 +++++++++--- tests/test_typed_dict.py | 59 +++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 3112619..abec206 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -234,6 +234,19 @@ def schema( return schema_data +def _fullname_for_forward_ref(py_type: Type, namespace: Optional[str], options: Option) -> str: + """Computes the fully-qualified name to be used in a ForwardRef ot break cycles.""" + name = py_type.__name__ + if namespace is None and Option.NO_AUTO_NAMESPACE not in options: + module = inspect.getmodule(py_type) + if module and module.__name__ != "builtin": + if Option.AUTO_NAMESPACE_MODULE in options: + namespace = module.__name__ + else: + namespace = module.__name__.split(".", 1)[0] + return f"{namespace}.{name}" if namespace else name + + def _schema_obj( py_type: Type, namespace: Optional[str] = None, @@ -251,7 +264,7 @@ def _schema_obj( # If py_type is currently being processed further up the stack, emit a ForwardRef to break the cycle unwrapped = _type_from_annotated(py_type) if unwrapped in processing and hasattr(unwrapped, "__name__"): - py_type = ForwardRef(unwrapped.__name__) # type: ignore + py_type = ForwardRef(_fullname_for_forward_ref(unwrapped, namespace, options)) # type: ignore # Find concrete Schema subclasses defined in the current module for schema_class in sorted(_SCHEMA_CLASSES, key=lambda c: getattr(c, "__py_avro_priority", 0)): # Find the first schema class that handles py_type @@ -1613,10 +1626,7 @@ def _is_list_any(py_type: Type) -> bool: def is_logically_json(py_type: Type) -> bool: """Returns whether a given type is logically a JSON and can be serialized as such""" - try: - return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) - except RecursionError: - return False + return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type) def _is_class( diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py index 646226f..8c78493 100644 --- a/tests/test_typed_dict.py +++ b/tests/test_typed_dict.py @@ -274,7 +274,7 @@ class Configuration(TypedDict): def test_recursive_reference(): - """Test simple recursive reference with no ``WRAP_INTO_RECORDS``.""" + """Test simple recursive reference with no wrapped records.""" class PyType(TypedDict): Configurations: ConfigurationList | None @@ -355,3 +355,60 @@ class PyType(TypedDict): ], } assert_schema(PyType, expected, options=pas.Option.WRAP_INTO_RECORDS) + + +RecExpressions = list["RecExpression"] + + +class RecExpression(TypedDict, total=False): + Or: RecExpressions | None + And: RecExpressions | None + Not: "RecExpression | None" + + +def test_recursive_reference_with_wrap_into_records_and_namespaces(): + """Checks that with WRAP_INTO_RECORDS and AUTO_NAMESPACE_MODULE a self-recursive record is referenced by + its fully-qualified name from inside the list wrapper. + """ + expected = { + "type": "record", + "name": "RecExpression", + "namespace": "test_typed_dict", + "fields": [ + { + "name": "Or", + "type": [ + { + "type": "record", + "name": "TestTypedDictRecExpressionList", + "namespace": "builtins", + "fields": [ + {"name": "__id", "type": ["null", "long"], "default": None}, + { + "name": "__data", + "type": { + "type": "array", + "items": "test_typed_dict.RecExpression", + }, + }, + ], + }, + "null", + ], + }, + { + "name": "And", + "type": ["builtins.TestTypedDictRecExpressionList", "null"], + }, + { + "name": "Not", + "type": ["test_typed_dict.RecExpression", "null"], + }, + ], + } + assert_schema( + RecExpression, + expected, + options=pas.Option.WRAP_INTO_RECORDS, + do_auto_namespace=True, + )