From e27bda7558fe69f0a8ec3973a26c5164fd1eff1f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 24 Jun 2026 23:35:27 +0200 Subject: [PATCH] Send the mDNS PTR record lifetime, not its remaining TTL The drawer derives its offline countdown as the record lifetime minus the last-seen age, so the value re-anchors in lockstep with last seen instead of tracking the PTR's remaining TTL; the browser refreshes that remaining TTL at ~80% of the lifetime, which drifts against the actively-probed A record and reads out of sync with last seen. Replaces the reachability snapshot's mdns_ptr_ttl_remaining_seconds with mdns_ptr_ttl_seconds (the device's own announced PTR lifetime) and drops devices/get_reachability; the countdown is computed client-side from data the subscription already pushes, so no poll is needed. --- docs/API.md | 3 +- .../controllers/_device_state_monitor/mdns.py | 17 ++++--- .../controllers/_reachability_tracker.py | 18 ++++--- .../controllers/devices/controller.py | 20 -------- esphome_device_builder/models/devices.py | 2 +- .../devices/test_subscribe_reachability.py | 48 ------------------- tests/test_reachability_tracker.py | 12 ++--- tests/test_state_monitor_reachability.py | 9 ++-- 8 files changed, 28 insertions(+), 101 deletions(-) diff --git a/docs/API.md b/docs/API.md index b9ee64ff1..a1a1bb8db 100644 --- a/docs/API.md +++ b/docs/API.md @@ -125,8 +125,7 @@ Connections that arrive on the trusted ingress site (HA add-on supervisor proxy) | `devices/ignore` | `{name, ignore?}` | — | Toggle device visibility | | `devices/validate` | `{configuration}` | Streaming | Validate YAML config | | `devices/logs` | `{configuration, port?: "OTA" \| serial, no_states?: bool}` | Streaming | Stream live device logs. `port` defaults to `"OTA"` (empty string is treated the same) — without a default, `esphome logs` falls into an interactive port-choice prompt when multiple targets are visible and the stdin-less subprocess crashes with `EOFError`. When `port` resolves to `"OTA"` the dashboard forwards its mDNS / DNS cache as `--mdns-address-cache` / `--dns-address-cache` so the CLI doesn't redo resolution the dashboard already has (legacy-dashboard parity with `build_cache_arguments`). | -| `devices/subscribe_reachability` | `{device_name}` | Streaming (`reachability_state`) | Drawer-only per-device reachability stream. Each `reachability_state` event carries the per-signal freshness snapshot (`mdns_last_seen_seconds_ago`, `mdns_ttl_remaining_seconds`, `mdns_ptr_ttl_remaining_seconds`, `mdns_txt_records`, `ping_last_seen_seconds_ago`, `ping_rtt_ms`, `mqtt_last_seen_seconds_ago`, plus `state` / `active_source` / `ip`). `mdns_ptr_ttl_remaining_seconds` is the PTR record's own remaining TTL — the countdown to the zeroconf `Removed` event that flips an mDNS-owned device OFFLINE (`null` when no PTR is cached). Pair with `devices/stop_stream`. | -| `devices/get_reachability` | `{device_name}` | `DeviceReachabilityData` | One-shot read of the same snapshot the subscribe stream seeds, including the live `mdns_ptr_ttl_remaining_seconds`. Exists because a pure PTR TTL refresh is deduped by zeroconf and never pushed, so a client that needs the current value must read it. `INVALID_MESSAGE` when `device_name` is empty; `NOT_FOUND` when no configured device matches. | +| `devices/subscribe_reachability` | `{device_name}` | Streaming (`reachability_state`) | Drawer-only per-device reachability stream. Each `reachability_state` event carries the per-signal freshness snapshot (`mdns_last_seen_seconds_ago`, `mdns_ttl_remaining_seconds`, `mdns_ptr_ttl_seconds`, `mdns_txt_records`, `ping_last_seen_seconds_ago`, `ping_rtt_ms`, `mqtt_last_seen_seconds_ago`, plus `state` / `active_source` / `ip`). `mdns_ptr_ttl_seconds` is the PTR record's full announced lifetime (mDNS default ~4500s, `null` when no PTR is cached); the drawer's offline countdown is that lifetime measured from `mdns_last_seen_seconds_ago`, so it stays in lockstep with "last seen". Pair with `devices/stop_stream`. | `Device.state`: `DeviceState` — `unknown`, `online`, or `offline` (discovered via mDNS + ping). `Device.has_pending_changes`: `true` = config changed since last compile, `false` = up to date, `null` = never compiled. diff --git a/esphome_device_builder/controllers/_device_state_monitor/mdns.py b/esphome_device_builder/controllers/_device_state_monitor/mdns.py index 4ceb5d3da..b241d2c73 100644 --- a/esphome_device_builder/controllers/_device_state_monitor/mdns.py +++ b/esphome_device_builder/controllers/_device_state_monitor/mdns.py @@ -213,18 +213,17 @@ def get_mdns_cache_info(self, name: str) -> MdnsCacheInfo | None: # into 0.108 and render as "TTL: 0s". age_s = max(0.0, millis_to_seconds(now_ms - latest.created)) ttl_remaining_s = max(0.0, float(latest.get_remaining_ttl(now_ms))) - # The PTR's own TTL — what ``AsyncServiceBrowser`` counts - # down to fire ``Removed`` (OFFLINE). Surfaced separately so - # the drawer can show a "goes offline in N" countdown; the - # union TTL above tracks the freshest record (usually the - # ~120s A the refresh loop renews), not the offline horizon. - ptr_ttl_remaining_s = ( - max(0.0, float(ptr.get_remaining_ttl(now_ms))) if ptr is not None else None - ) + # The PTR's full announced TTL (the device's own record + # lifetime). The drawer's "offline in N" countdown is this + # lifetime measured from ``age_seconds`` so it stays in + # lockstep with "last seen"; the PTR's *remaining* TTL is + # refreshed by the browser at ~80% of the lifetime and would + # drift against the actively-probed A record. + ptr_ttl_s = float(ptr.ttl) if ptr is not None else None return MdnsCacheInfo( age_seconds=age_s, ttl_remaining_seconds=ttl_remaining_s, - ptr_ttl_remaining_seconds=ptr_ttl_remaining_s, + ptr_ttl_seconds=ptr_ttl_s, txt_records=_decode_mdns_txt_records(txt_dns_records), ) diff --git a/esphome_device_builder/controllers/_reachability_tracker.py b/esphome_device_builder/controllers/_reachability_tracker.py index 360d07c44..534f295f7 100644 --- a/esphome_device_builder/controllers/_reachability_tracker.py +++ b/esphome_device_builder/controllers/_reachability_tracker.py @@ -52,12 +52,10 @@ class MdnsCacheInfo: refresh counts. ``ttl_remaining_seconds`` = the same record's :meth:`DNSRecord.get_remaining_ttl`. - ``ptr_ttl_remaining_seconds`` = the PTR record's own - remaining TTL, i.e. seconds until ``AsyncServiceBrowser`` - fires ``Removed`` and the device flips OFFLINE; ``None`` - when no PTR is cached. Distinct from the union TTL above, - which tracks whichever record is freshest (usually the - ~120s A record the refresh loop renews). + ``ptr_ttl_seconds`` = the PTR record's full announced TTL + (the device's own record lifetime, mDNS default ~4500s); + ``None`` when no PTR is cached. The drawer's "offline in N" + countdown is this minus ``age_seconds``. ``txt_records`` = parsed ``key -> value`` pairs from the device's TXT record, sorted alphabetically for deterministic wire output. @@ -72,7 +70,7 @@ class MdnsCacheInfo: age_seconds: float ttl_remaining_seconds: float - ptr_ttl_remaining_seconds: float | None = None + ptr_ttl_seconds: float | None = None txt_records: dict[str, str] = field(default_factory=dict) @@ -178,7 +176,7 @@ def _ago(timestamp: float | None) -> float | None: mdns_age: float | None = None mdns_ttl_remaining: float | None = None - mdns_ptr_ttl_remaining: float | None = None + mdns_ptr_ttl: float | None = None # ``None`` means "hide the TXT section" — collapses # both "no TXT cached" and "TXT cached but no useful # keys decoded" so the renderer is a single @@ -189,7 +187,7 @@ def _ago(timestamp: float | None) -> float | None: if info is not None: mdns_age = info.age_seconds mdns_ttl_remaining = info.ttl_remaining_seconds - mdns_ptr_ttl_remaining = info.ptr_ttl_remaining_seconds + mdns_ptr_ttl = info.ptr_ttl_seconds # Fresh dict on the wire so downstream mutation # can't reach into zeroconf's internals. mdns_txt_records = dict(info.txt_records) if info.txt_records else None @@ -201,7 +199,7 @@ def _ago(timestamp: float | None) -> float | None: "ip": ip, "mdns_last_seen_seconds_ago": mdns_age, "mdns_ttl_remaining_seconds": mdns_ttl_remaining, - "mdns_ptr_ttl_remaining_seconds": mdns_ptr_ttl_remaining, + "mdns_ptr_ttl_seconds": mdns_ptr_ttl, "mdns_txt_records": mdns_txt_records, "ping_last_seen_seconds_ago": _ago(self._ping_last_seen.get(name)), "mqtt_last_seen_seconds_ago": _ago(self._mqtt_last_seen.get(name)), diff --git a/esphome_device_builder/controllers/devices/controller.py b/esphome_device_builder/controllers/devices/controller.py index 0e211f4c8..712fda4a0 100644 --- a/esphome_device_builder/controllers/devices/controller.py +++ b/esphome_device_builder/controllers/devices/controller.py @@ -938,26 +938,6 @@ async def subscribe_reachability( self, device_name=device_name, client=client, message_id=message_id ) - @api_command("devices/get_reachability") - async def get_reachability( - self, - *, - device_name: str, - **kwargs: Any, - ) -> DeviceReachabilityData: - """ - One-shot read of the reachability snapshot. - - ``INVALID_MESSAGE`` when *device_name* is empty, ``NOT_FOUND`` - when no configured device matches. - """ - if not device_name: - raise CommandError(ErrorCode.INVALID_MESSAGE, "device_name is required") - snapshot = self.get_reachability_snapshot(device_name) - if snapshot is None: - raise CommandError(ErrorCode.NOT_FOUND, f"No configured device named {device_name!r}") - return snapshot - async def _reachability_refresh_loop(self, device_name: str) -> None: await reachability.refresh_loop(self, device_name) diff --git a/esphome_device_builder/models/devices.py b/esphome_device_builder/models/devices.py index 2a0fa993d..3fd43c654 100644 --- a/esphome_device_builder/models/devices.py +++ b/esphome_device_builder/models/devices.py @@ -346,7 +346,7 @@ class DeviceReachabilityData(TypedDict): ip: str mdns_last_seen_seconds_ago: float | None mdns_ttl_remaining_seconds: float | None - mdns_ptr_ttl_remaining_seconds: float | None + mdns_ptr_ttl_seconds: float | None mdns_txt_records: dict[str, str] | None ping_last_seen_seconds_ago: float | None mqtt_last_seen_seconds_ago: float | None diff --git a/tests/controllers/devices/test_subscribe_reachability.py b/tests/controllers/devices/test_subscribe_reachability.py index 260328be4..f43a17cc0 100644 --- a/tests/controllers/devices/test_subscribe_reachability.py +++ b/tests/controllers/devices/test_subscribe_reachability.py @@ -609,51 +609,3 @@ async def fast_sleep(_: float) -> None: await controller._reachability_refresh_loop("kitchen") state_monitor.refresh_mdns.assert_not_awaited() - - -async def test_get_reachability_returns_current_snapshot( - tmp_path: Path, make_controller: MakeControllerFactory -) -> None: - """The one-shot poll command returns the same snapshot subscribe seeds.""" - controller = make_controller(tmp_path) - tracker = ReachabilityTracker() - bus = EventBus() - _wire_reachability(controller, tracker, bus) - _seed_device(controller) - tracker.observe("kitchen", "ping") - - snap = await controller.get_reachability(device_name="kitchen") - assert snap["device"] == "kitchen" - assert snap["ping_last_seen_seconds_ago"] is not None - # Same wire shape as the subscribe seed (PTR-TTL field included). - assert snap.keys() == controller.get_reachability_snapshot("kitchen").keys() - assert "mdns_ptr_ttl_remaining_seconds" in snap - - -async def test_get_reachability_unknown_device_raises_not_found( - tmp_path: Path, make_controller: MakeControllerFactory -) -> None: - """Unknown ``device_name`` surfaces as a typed NOT_FOUND.""" - controller = make_controller(tmp_path) - tracker = ReachabilityTracker() - bus = EventBus() - _wire_reachability(controller, tracker, bus) - controller._scanner.get_by_name = lambda _name: [] - - with pytest.raises(CommandError) as exc: - await controller.get_reachability(device_name="nope") - assert exc.value.code == ErrorCode.NOT_FOUND - - -async def test_get_reachability_missing_device_name_raises( - tmp_path: Path, make_controller: MakeControllerFactory -) -> None: - """Empty ``device_name`` surfaces as a typed INVALID_MESSAGE, mirroring subscribe.""" - controller = make_controller(tmp_path) - tracker = ReachabilityTracker() - bus = EventBus() - _wire_reachability(controller, tracker, bus) - - with pytest.raises(CommandError) as exc: - await controller.get_reachability(device_name="") - assert exc.value.code == ErrorCode.INVALID_MESSAGE diff --git a/tests/test_reachability_tracker.py b/tests/test_reachability_tracker.py index 9e963c8f0..fd90247f1 100644 --- a/tests/test_reachability_tracker.py +++ b/tests/test_reachability_tracker.py @@ -54,7 +54,7 @@ def test_snapshot_empty_returns_all_nulls() -> None: "ip": "", "mdns_last_seen_seconds_ago": None, "mdns_ttl_remaining_seconds": None, - "mdns_ptr_ttl_remaining_seconds": None, + "mdns_ptr_ttl_seconds": None, "mdns_txt_records": None, "ping_last_seen_seconds_ago": None, "mqtt_last_seen_seconds_ago": None, @@ -73,7 +73,7 @@ def test_snapshot_uses_mdns_cache_reader() -> None: info = MdnsCacheInfo( age_seconds=12.4, ttl_remaining_seconds=107.6, - ptr_ttl_remaining_seconds=4321.0, + ptr_ttl_seconds=4500.0, ) tracker = ReachabilityTracker( mdns_cache_reader={"kitchen": info}.get, @@ -82,9 +82,9 @@ def test_snapshot_uses_mdns_cache_reader() -> None: snap = _snapshot(tracker) assert snap["mdns_last_seen_seconds_ago"] == 12.4 assert snap["mdns_ttl_remaining_seconds"] == 107.6 - # The PTR's own TTL — the drawer's "offline in N" countdown — - # is carried separately from the freshest-record union TTL. - assert snap["mdns_ptr_ttl_remaining_seconds"] == 4321.0 + # The PTR's full announced lifetime — the drawer derives the + # "offline in N" countdown from this minus the last-seen age. + assert snap["mdns_ptr_ttl_seconds"] == 4500.0 def test_snapshot_mdns_null_when_cache_reader_returns_none() -> None: @@ -94,7 +94,7 @@ def test_snapshot_mdns_null_when_cache_reader_returns_none() -> None: snap = _snapshot(tracker) assert snap["mdns_last_seen_seconds_ago"] is None assert snap["mdns_ttl_remaining_seconds"] is None - assert snap["mdns_ptr_ttl_remaining_seconds"] is None + assert snap["mdns_ptr_ttl_seconds"] is None assert snap["mdns_txt_records"] is None diff --git a/tests/test_state_monitor_reachability.py b/tests/test_state_monitor_reachability.py index eb33582fd..e83e95a79 100644 --- a/tests/test_state_monitor_reachability.py +++ b/tests/test_state_monitor_reachability.py @@ -712,10 +712,9 @@ def test_get_mdns_cache_info_picks_latest_across_record_types() -> None: assert info is not None # PTR (5s ago) is fresher than A (110s ago) → PTR wins. assert info.age_seconds == pytest.approx(5.0, abs=0.5) - # PTR TTL is carried separately as the offline-countdown - # horizon: 4500s TTL aged 5s → ~4495s remaining, distinct - # from the freshest-record union TTL above. - assert info.ptr_ttl_remaining_seconds == pytest.approx(4495.0, abs=1.0) + # PTR full announced lifetime (not remaining) — the drawer + # derives the offline countdown from this minus last-seen age. + assert info.ptr_ttl_seconds == 4500.0 finally: zc.close() @@ -1062,7 +1061,7 @@ def test_get_mdns_cache_info_picks_latest_record() -> None: assert info.age_seconds == pytest.approx(5.0, abs=0.5) assert info.ttl_remaining_seconds == pytest.approx(115.0, abs=0.5) # No PTR cached (lookup stubbed to None) → no offline-countdown horizon. - assert info.ptr_ttl_remaining_seconds is None + assert info.ptr_ttl_seconds is None async def test_refresh_mdns_no_zeroconf_is_a_noop() -> None: