diff --git a/pytest.ini b/pytest.ini index f209a8f..a4399f0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] filterwarnings = ignore::pytest.PytestCollectionWarning -norecursedirs=tests/helpers +norecursedirs=tests/helpers tests/buildkite_test_collector/data diff --git a/src/buildkite_test_collector/pytest_plugin/buildkite_plugin.py b/src/buildkite_test_collector/pytest_plugin/buildkite_plugin.py index 4e39612..1b93e93 100644 --- a/src/buildkite_test_collector/pytest_plugin/buildkite_plugin.py +++ b/src/buildkite_test_collector/pytest_plugin/buildkite_plugin.py @@ -9,6 +9,22 @@ from .logger import logger from .failure_reasons import failure_reasons + +def _is_subtest_report(report): + """Detect SubtestReport from pytest>=9.0 built-in subtests. + + SubtestReport is a TestReport subclass with a ``context`` attribute + containing ``msg`` and ``kwargs``. We use duck-typing so the check + works without importing any specific class, keeping backwards + compatibility with older pytest versions that lack subtests support. + """ + return ( + hasattr(report, "context") + and hasattr(report.context, "msg") + and hasattr(report.context, "kwargs") + ) + + class BuildkitePlugin: """Buildkite test collector plugin for Pytest""" @@ -16,6 +32,10 @@ def __init__(self, payload): self.payload = payload self.in_flight = {} self.spans = {} + # Tracks nodeids whose in-flight result was set to failed by a + # SubtestReport. Used to prevent the parent test's "passed" + # call-phase report from overwriting the failure. + self._failed_by_subtest = set() def pytest_collection_modifyitems(self, config, items): """pytest_collection_modifyitems hook callback to filter tests by execution_tag markers""" @@ -53,6 +73,39 @@ def pytest_runtest_logreport(self, report): """pytest_runtest_logreport hook callback to get test outcome after test call""" logger.debug('hook=pytest_runtest_logreport nodeid=%s when=%s', report.nodeid, report.when) + # Handle SubtestReport objects (pytest>=9.0 built-in subtests). + # + # SubtestReport shares the parent test's nodeid, so without + # special handling each subtest's result overwrites the previous + # one in self.in_flight — a last-write-wins race. Worse, the + # parent test's own call-phase report arrives with outcome="passed" + # (exceptions inside subtests are swallowed by the context manager) + # and would overwrite any subtest failure. + # + # Strategy: propagate subtest *failures* to the parent's in-flight + # entry. Ignore passing/skipped subtests (they must not overwrite + # a previous failure). Guard against the parent's "passed" report + # overwriting a subtest-induced failure. + if _is_subtest_report(report) and report.when == "call": + if report.failed: + test_data = self.in_flight.get(report.nodeid) + if test_data: + failure_reason, failure_expanded = failure_reasons( + longrepr=report.longrepr + ) + logger.debug( + "-> subtest failed, propagating to parent: %s", + failure_reason, + ) + self.in_flight[report.nodeid] = test_data.failed( + failure_reason=failure_reason, + failure_expanded=failure_expanded, + ) + self._failed_by_subtest.add(report.nodeid) + else: + logger.debug("-> subtest passed/skipped, ignoring") + return + # This hook is called three times during the lifecycle of a test: # after the setup phase, the call phase, and the teardown phase. # We capture outcomes from the call phase, or setup/teardown phase if it failed @@ -61,6 +114,19 @@ def pytest_runtest_logreport(self, report): # See: https://github.com/buildkite/test-collector-python/pull/45 # See: https://github.com/buildkite/test-collector-python/issues/84 if report.when == 'call' or (report.when in ('setup', 'teardown') and report.failed): + # Guard: do not let the parent test's "passed" call-phase + # report overwrite a failure that was set by a SubtestReport. + if ( + report.when == "call" + and report.passed + and report.nodeid in self._failed_by_subtest + ): + logger.debug( + "-> parent call report is 'passed' but subtest(s) failed; " + "preserving subtest failure" + ) + return + self.update_test_result(report) # This hook only runs in xdist worker thread, not controller thread. @@ -140,6 +206,10 @@ def finalize_test(self, nodeid): test_data = test_data.finish() logger.debug('-> finalize_test nodeid=%s duration=%s', nodeid, test_data.history.duration) self.payload = self.payload.push_test_data(test_data) + + # Clean up subtest tracking state for this test. + self._failed_by_subtest.discard(nodeid) + return True def save_payload_as_json(self, path, merge=False): diff --git a/tests/buildkite_test_collector/data/test_sample_subtests.py b/tests/buildkite_test_collector/data/test_sample_subtests.py new file mode 100644 index 0000000..88739a8 --- /dev/null +++ b/tests/buildkite_test_collector/data/test_sample_subtests.py @@ -0,0 +1,31 @@ +"""Sample test file used by the integration test. + +This file exercises pytest>=9.0 built-in subtests and is run in a subprocess +by test_integration_subtests.py to verify the JSON output. +""" + + +def test_mixed_subtests(subtests): + """A test where some subtests pass and one fails.""" + with subtests.test(msg="passing check 1"): + assert 1 + 1 == 2 + + with subtests.test(msg="failing check"): + assert 1 + 1 == 3 # noqa: PLR0133 -- intentional failure + + with subtests.test(msg="passing check 2"): + assert 2 + 2 == 4 + + +def test_all_subtests_pass(subtests): + """A test where all subtests pass.""" + with subtests.test(msg="alpha"): + assert True + + with subtests.test(msg="beta"): + assert True + + +def test_no_subtests(): + """A plain test with no subtests — should be unaffected.""" + assert 42 == 42 diff --git a/tests/buildkite_test_collector/pytest_plugin/test_plugin_subtests.py b/tests/buildkite_test_collector/pytest_plugin/test_plugin_subtests.py new file mode 100644 index 0000000..9c494e4 --- /dev/null +++ b/tests/buildkite_test_collector/pytest_plugin/test_plugin_subtests.py @@ -0,0 +1,468 @@ +"""Tests for SubtestReport handling in the Buildkite test collector plugin. + +These tests verify that the plugin correctly handles SubtestReport objects +from pytest>=9.0 built-in subtests, preventing the last-write-wins overwrite +bug described in https://github.com/buildkite/test-collector-python/issues/93. +""" + +import dataclasses +from dataclasses import dataclass +from types import MappingProxyType +from typing import Mapping, Any, Optional + +import pytest + +from buildkite_test_collector.collector.payload import ( + Payload, + TestResultFailed, + TestResultPassed, +) +from buildkite_test_collector.pytest_plugin.buildkite_plugin import ( + BuildkitePlugin, + _is_subtest_report, +) +from _pytest.reports import TestReport + + +# --------------------------------------------------------------------------- +# Helpers: lightweight SubtestReport stand-in +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class SubtestContext: + """Mimics pytest.SubtestContext (pytest>=9.0).""" + + msg: Optional[str] = None + kwargs: Mapping[str, Any] = dataclasses.field(default_factory=lambda: MappingProxyType({})) + + +class FakeSubtestReport(TestReport): + """A TestReport with a ``context`` attribute, matching the duck-type + signature of ``pytest.SubtestReport``. + + We intentionally avoid importing ``pytest.SubtestReport`` so that the + tests can run on pytest<9.0 as well. The plugin uses duck-typing, so + this fake is sufficient. + """ + + def __init__(self, *, nodeid, outcome, longrepr=None, msg=None, **kwargs): + location = ("test_file.py", 0, "test_func") + super().__init__( + nodeid=nodeid, + location=location, + keywords={}, + outcome=outcome, + longrepr=longrepr, + when="call", + ) + self.context = SubtestContext(msg=msg, kwargs=MappingProxyType(kwargs)) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +NODEID = "tests/test_example.py::test_with_subtests" +LOCATION = ("tests/test_example.py", 10, "test_with_subtests") + + +@pytest.fixture +def plugin(fake_env): + """A fresh BuildkitePlugin with the parent test already in-flight.""" + payload = Payload.init(fake_env) + p = BuildkitePlugin(payload) + p.pytest_runtest_logstart(NODEID, LOCATION) + return p + + +# --------------------------------------------------------------------------- +# Detection helper +# --------------------------------------------------------------------------- + +class TestIsSubtestReport: + """Tests for _is_subtest_report() duck-type detection.""" + + def test_detects_fake_subtest_report(self): + report = FakeSubtestReport(nodeid=NODEID, outcome="passed", msg="hello") + assert _is_subtest_report(report) is True + + def test_rejects_plain_test_report(self): + report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + assert _is_subtest_report(report) is False + + def test_rejects_report_with_unrelated_context(self): + """An object with a ``context`` that lacks ``msg``/``kwargs``.""" + report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + report.context = "some string" # not the right shape + assert _is_subtest_report(report) is False + + +# --------------------------------------------------------------------------- +# Core behaviour +# --------------------------------------------------------------------------- + +class TestSubtestFailurePropagation: + """A failed SubtestReport must mark the parent test as failed.""" + + def test_single_subtest_failure_marks_parent_failed(self, plugin): + sub = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="AssertionError: expected 1, got 2", + msg="check value", + ) + plugin.pytest_runtest_logreport(sub) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) + assert test_data.result.failure_reason == "AssertionError: expected 1, got 2" + + def test_multiple_subtest_failures_keep_parent_failed(self, plugin): + """When multiple subtests fail, the parent stays failed.""" + sub1 = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="first failure", + msg="subtest 1", + ) + sub2 = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="second failure", + msg="subtest 2", + ) + plugin.pytest_runtest_logreport(sub1) + plugin.pytest_runtest_logreport(sub2) + + test_data = plugin.in_flight[NODEID] + # The important invariant: it is NOT marked "passed" + assert isinstance(test_data.result, TestResultFailed) + + +class TestPassedSubtestIgnored: + """Passing subtests must not overwrite the parent's result.""" + + def test_passed_subtest_does_not_set_result(self, plugin): + sub = FakeSubtestReport( + nodeid=NODEID, + outcome="passed", + msg="check something", + ) + plugin.pytest_runtest_logreport(sub) + + test_data = plugin.in_flight[NODEID] + # Result should still be None (initial state), not TestResultPassed + assert test_data.result is None + + def test_passed_subtest_after_failure_preserves_failure(self, plugin): + fail = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="boom", + msg="bad subtest", + ) + ok = FakeSubtestReport( + nodeid=NODEID, + outcome="passed", + msg="good subtest", + ) + plugin.pytest_runtest_logreport(fail) + plugin.pytest_runtest_logreport(ok) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) + assert test_data.result.failure_reason == "boom" + + +class TestSkippedSubtestIgnored: + """Skipped subtests must not overwrite the parent's result.""" + + def test_skipped_subtest_does_not_set_result(self, plugin): + sub = FakeSubtestReport( + nodeid=NODEID, + outcome="skipped", + longrepr=("test_file.py", 10, "skipped"), + msg="skip this", + ) + plugin.pytest_runtest_logreport(sub) + + test_data = plugin.in_flight[NODEID] + assert test_data.result is None + + +class TestParentReportGuard: + """The parent test's own 'passed' call report must not overwrite a + subtest-induced failure.""" + + def test_parent_passed_does_not_overwrite_subtest_failure(self, plugin): + # Step 1: subtest fails + sub = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="subtest assertion failed", + msg="failing subtest", + ) + plugin.pytest_runtest_logreport(sub) + + # Step 2: parent's own call-phase report arrives as "passed" + # (because the subtest context manager swallows the exception) + parent_report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + plugin.pytest_runtest_logreport(parent_report) + + # The parent must still show as failed + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) + assert test_data.result.failure_reason == "subtest assertion failed" + + def test_parent_failed_still_works_normally(self, plugin): + """If the parent test itself fails (not via subtests), that's fine.""" + parent_report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="failed", + longrepr="parent body raised", + when="call", + ) + plugin.pytest_runtest_logreport(parent_report) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) + assert test_data.result.failure_reason == "parent body raised" + + def test_teardown_failure_still_overrides(self, plugin): + """A teardown failure should still override a passed call result, + even when subtests are not involved.""" + call_report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + plugin.pytest_runtest_logreport(call_report) + + teardown_report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="failed", + longrepr="teardown exploded", + when="teardown", + ) + plugin.pytest_runtest_logreport(teardown_report) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) + + def test_teardown_failure_overrides_subtest_failure(self, plugin): + """If subtests failed AND teardown failed, teardown failure wins + (consistent with existing behaviour where teardown overrides call).""" + sub = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="subtest boom", + msg="bad", + ) + plugin.pytest_runtest_logreport(sub) + + # Parent call arrives as "passed" — blocked by guard + call_report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + plugin.pytest_runtest_logreport(call_report) + + # Teardown fails — this should still go through (it's a real failure) + teardown_report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="failed", + longrepr="teardown also exploded", + when="teardown", + ) + plugin.pytest_runtest_logreport(teardown_report) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) + assert test_data.result.failure_reason == "teardown also exploded" + + +class TestFinalizeCleanup: + """finalize_test should clean up subtest tracking state.""" + + def test_finalize_clears_subtest_tracking(self, plugin): + sub = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="boom", + msg="fail", + ) + plugin.pytest_runtest_logreport(sub) + assert NODEID in plugin._failed_by_subtest + + plugin.finalize_test(NODEID) + + assert NODEID not in plugin._failed_by_subtest + assert NODEID not in plugin.in_flight + + def test_finalize_without_subtests_is_noop_for_tracking(self, plugin): + """finalize_test works normally when no subtests were involved.""" + call_report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + plugin.pytest_runtest_logreport(call_report) + + assert NODEID not in plugin._failed_by_subtest + result = plugin.finalize_test(NODEID) + assert result is True + assert NODEID not in plugin.in_flight + + +class TestEndToEndJsonOutput: + """Verify that the JSON output correctly reflects subtest failures.""" + + def test_failed_subtest_produces_failed_json_entry(self, plugin, tmp_path): + # Subtest fails + sub = FakeSubtestReport( + nodeid=NODEID, + outcome="failed", + longrepr="assert 1 == 2", + msg="value check", + ) + plugin.pytest_runtest_logreport(sub) + + # Parent "passes" + parent = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + plugin.pytest_runtest_logreport(parent) + + # Finalize + plugin.finalize_test(NODEID) + + # Write JSON + path = tmp_path / "results.json" + plugin.save_payload_as_json(str(path)) + + import json + results = json.loads(path.read_text()) + + assert len(results) == 1 + entry = results[0] + assert entry["result"] == "failed" + assert entry["failure_reason"] == "assert 1 == 2" + assert entry["scope"] == "tests/test_example.py" + assert entry["name"] == "test_with_subtests" + + def test_all_subtests_pass_produces_passed_json_entry(self, plugin, tmp_path): + # Two passing subtests + for msg in ("check A", "check B"): + sub = FakeSubtestReport(nodeid=NODEID, outcome="passed", msg=msg) + plugin.pytest_runtest_logreport(sub) + + # Parent passes + parent = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + plugin.pytest_runtest_logreport(parent) + + plugin.finalize_test(NODEID) + + path = tmp_path / "results.json" + plugin.save_payload_as_json(str(path)) + + import json + results = json.loads(path.read_text()) + + assert len(results) == 1 + assert results[0]["result"] == "passed" + + +class TestNoSubtestsRegression: + """Verify zero behaviour change when subtests are not used.""" + + def test_simple_pass_unchanged(self, plugin): + report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="passed", + longrepr=None, + when="call", + ) + plugin.pytest_runtest_logreport(report) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultPassed) + + def test_simple_fail_unchanged(self, plugin): + report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="failed", + longrepr="plain failure", + when="call", + ) + plugin.pytest_runtest_logreport(report) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) + assert test_data.result.failure_reason == "plain failure" + + def test_setup_failure_unchanged(self, plugin): + report = TestReport( + nodeid=NODEID, + location=LOCATION, + keywords={}, + outcome="failed", + longrepr="fixture exploded", + when="setup", + ) + plugin.pytest_runtest_logreport(report) + + test_data = plugin.in_flight[NODEID] + assert isinstance(test_data.result, TestResultFailed) diff --git a/tests/buildkite_test_collector/test_integration_subtests.py b/tests/buildkite_test_collector/test_integration_subtests.py new file mode 100644 index 0000000..10d8069 --- /dev/null +++ b/tests/buildkite_test_collector/test_integration_subtests.py @@ -0,0 +1,129 @@ +"""Integration test: run a real pytest subprocess with subtests and verify JSON output. + +This mirrors the existing test_integration.py pattern in the repo — it +invokes pytest in a subprocess with ``--json=`` and inspects the +resulting JSON file. + +Requires pytest>=9.0 for built-in subtests support. The test is +automatically skipped on older pytest versions. +""" + +import json +import subprocess +import sys +from pathlib import Path + +import pytest + + +SAMPLE_FILE = Path(__file__).parent / "data" / "test_sample_subtests.py" + +# Check if pytest>=9.0 is available (needed for built-in subtests) +_pytest_version = tuple(int(x) for x in pytest.__version__.split(".")[:2]) +_has_builtin_subtests = _pytest_version >= (9, 0) + + +@pytest.mark.skipif( + not _has_builtin_subtests, + reason=f"pytest>=9.0 required for built-in subtests (have {pytest.__version__})", +) +class TestSubtestIntegration: + """End-to-end: pytest subprocess -> collector -> JSON file -> assertions.""" + + def _run_pytest(self, tmp_path, *extra_args): + json_output = tmp_path / "results.json" + cmd = [ + sys.executable, + "-m", + "pytest", + str(SAMPLE_FILE), + f"--json={json_output}", + "-v", + *extra_args, + ] + result = subprocess.run(cmd, capture_output=True, text=True) + return result, json_output + + def test_mixed_subtests_reported_as_failed(self, tmp_path): + """A test with mixed pass/fail subtests must appear as 'failed'.""" + result, json_output = self._run_pytest(tmp_path, "-k", "test_mixed_subtests") + + # pytest itself should exit non-zero (there is a subtest failure) + assert result.returncode != 0, f"Expected failure, got:\n{result.stdout}" + + assert json_output.exists(), f"JSON not created. stderr:\n{result.stderr}" + data = json.loads(json_output.read_text()) + + tests_by_name = {t["name"]: t for t in data} + assert "test_mixed_subtests" in tests_by_name + + entry = tests_by_name["test_mixed_subtests"] + assert entry["result"] == "failed", ( + f"Expected 'failed' but got '{entry['result']}'. " + "The subtest failure was not propagated to the parent test." + ) + assert entry["failure_reason"] is not None + + def test_all_subtests_pass_reported_as_passed(self, tmp_path): + """A test where all subtests pass should appear as 'passed'.""" + result, json_output = self._run_pytest(tmp_path, "-k", "test_all_subtests_pass") + + assert result.returncode == 0, f"Unexpected failure:\n{result.stdout}" + assert json_output.exists() + + data = json.loads(json_output.read_text()) + tests_by_name = {t["name"]: t for t in data} + assert "test_all_subtests_pass" in tests_by_name + + entry = tests_by_name["test_all_subtests_pass"] + assert entry["result"] == "passed" + + def test_plain_test_unaffected(self, tmp_path): + """A test with no subtests should work exactly as before.""" + result, json_output = self._run_pytest(tmp_path, "-k", "test_no_subtests") + + assert result.returncode == 0, f"Unexpected failure:\n{result.stdout}" + assert json_output.exists() + + data = json.loads(json_output.read_text()) + tests_by_name = {t["name"]: t for t in data} + assert "test_no_subtests" in tests_by_name + + entry = tests_by_name["test_no_subtests"] + assert entry["result"] == "passed" + + def test_full_run_correct_count_and_results(self, tmp_path): + """Run all three sample tests. Verify counts and per-test results.""" + result, json_output = self._run_pytest(tmp_path) + + # One test fails, so pytest exits non-zero + assert result.returncode != 0 + + data = json.loads(json_output.read_text()) + tests_by_name = {t["name"]: t for t in data} + + # Should have exactly 3 test entries (one per test function) + assert len(data) == 3, ( + f"Expected 3 entries, got {len(data)}: {list(tests_by_name.keys())}" + ) + + assert tests_by_name["test_mixed_subtests"]["result"] == "failed" + assert tests_by_name["test_all_subtests_pass"]["result"] == "passed" + assert tests_by_name["test_no_subtests"]["result"] == "passed" + + def test_json_nodeids_are_real_pytest_nodeids(self, tmp_path): + """The scope::name in JSON must be a valid pytest nodeid — no + synthetic subtest names that would break bktec retries.""" + result, json_output = self._run_pytest(tmp_path) + + data = json.loads(json_output.read_text()) + + for entry in data: + # Reconstructed nodeid must match standard pytest format + reconstructed = f"{entry['scope']}::{entry['name']}" + assert "::" in reconstructed + # Must not contain subtest message fragments + assert "[" not in entry["name"], ( + f"Synthetic subtest name leaked into JSON: {entry['name']}" + ) + assert "()" not in entry["name"]